diff options
Diffstat (limited to 'arch')
436 files changed, 27666 insertions, 10166 deletions
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index f10d2eddd2c..b04f1feb1dd 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -994,7 +994,7 @@ marvel_agp_configure(alpha_agp_info *agp) * rate, but warn the user. */ printk("%s: unknown PLL setting RNGB=%lx (PLL6_CTL=%016lx)\n", - __FUNCTION__, IO7_PLL_RNGB(agp_pll), agp_pll); + __func__, IO7_PLL_RNGB(agp_pll), agp_pll); break; } @@ -1044,13 +1044,13 @@ marvel_agp_translate(alpha_agp_info *agp, dma_addr_t addr) if (addr < agp->aperture.bus_base || addr >= agp->aperture.bus_base + agp->aperture.size) { - printk("%s: addr out of range\n", __FUNCTION__); + printk("%s: addr out of range\n", __func__); return -EINVAL; } pte = aper->arena->ptes[baddr >> PAGE_SHIFT]; if (!(pte & 1)) { - printk("%s: pte not valid\n", __FUNCTION__); + printk("%s: pte not valid\n", __func__); return -EINVAL; } return (pte >> 1) << PAGE_SHIFT; diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c index f5ca5255eb0..c0750291b44 100644 --- a/arch/alpha/kernel/core_t2.c +++ b/arch/alpha/kernel/core_t2.c @@ -336,10 +336,7 @@ t2_direct_map_window1(unsigned long base, unsigned long length) #if DEBUG_PRINT_FINAL_SETTINGS printk("%s: setting WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n", - __FUNCTION__, - *(vulp)T2_WBASE1, - *(vulp)T2_WMASK1, - *(vulp)T2_TBASE1); + __func__, *(vulp)T2_WBASE1, *(vulp)T2_WMASK1, *(vulp)T2_TBASE1); #endif } @@ -366,10 +363,7 @@ t2_sg_map_window2(struct pci_controller *hose, #if DEBUG_PRINT_FINAL_SETTINGS printk("%s: setting WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n", - __FUNCTION__, - *(vulp)T2_WBASE2, - *(vulp)T2_WMASK2, - *(vulp)T2_TBASE2); + __func__, *(vulp)T2_WBASE2, *(vulp)T2_WMASK2, *(vulp)T2_TBASE2); #endif } @@ -377,15 +371,15 @@ static void __init t2_save_configuration(void) { #if DEBUG_PRINT_INITIAL_SETTINGS - printk("%s: HAE_1 was 0x%lx\n", __FUNCTION__, srm_hae); /* HW is 0 */ - printk("%s: HAE_2 was 0x%lx\n", __FUNCTION__, *(vulp)T2_HAE_2); - printk("%s: HAE_3 was 0x%lx\n", __FUNCTION__, *(vulp)T2_HAE_3); - printk("%s: HAE_4 was 0x%lx\n", __FUNCTION__, *(vulp)T2_HAE_4); - printk("%s: HBASE was 0x%lx\n", __FUNCTION__, *(vulp)T2_HBASE); + printk("%s: HAE_1 was 0x%lx\n", __func__, srm_hae); /* HW is 0 */ + printk("%s: HAE_2 was 0x%lx\n", __func__, *(vulp)T2_HAE_2); + printk("%s: HAE_3 was 0x%lx\n", __func__, *(vulp)T2_HAE_3); + printk("%s: HAE_4 was 0x%lx\n", __func__, *(vulp)T2_HAE_4); + printk("%s: HBASE was 0x%lx\n", __func__, *(vulp)T2_HBASE); - printk("%s: WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n", __FUNCTION__, + printk("%s: WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n", __func__, *(vulp)T2_WBASE1, *(vulp)T2_WMASK1, *(vulp)T2_TBASE1); - printk("%s: WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n", __FUNCTION__, + printk("%s: WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n", __func__, *(vulp)T2_WBASE2, *(vulp)T2_WMASK2, *(vulp)T2_TBASE2); #endif diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 819326627b9..319fcb74611 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -365,21 +365,21 @@ void __init titan_init_arch(void) { #if 0 - printk("%s: titan_init_arch()\n", __FUNCTION__); - printk("%s: CChip registers:\n", __FUNCTION__); - printk("%s: CSR_CSC 0x%lx\n", __FUNCTION__, TITAN_cchip->csc.csr); - printk("%s: CSR_MTR 0x%lx\n", __FUNCTION__, TITAN_cchip->mtr.csr); - printk("%s: CSR_MISC 0x%lx\n", __FUNCTION__, TITAN_cchip->misc.csr); - printk("%s: CSR_DIM0 0x%lx\n", __FUNCTION__, TITAN_cchip->dim0.csr); - printk("%s: CSR_DIM1 0x%lx\n", __FUNCTION__, TITAN_cchip->dim1.csr); - printk("%s: CSR_DIR0 0x%lx\n", __FUNCTION__, TITAN_cchip->dir0.csr); - printk("%s: CSR_DIR1 0x%lx\n", __FUNCTION__, TITAN_cchip->dir1.csr); - printk("%s: CSR_DRIR 0x%lx\n", __FUNCTION__, TITAN_cchip->drir.csr); - - printk("%s: DChip registers:\n", __FUNCTION__); - printk("%s: CSR_DSC 0x%lx\n", __FUNCTION__, TITAN_dchip->dsc.csr); - printk("%s: CSR_STR 0x%lx\n", __FUNCTION__, TITAN_dchip->str.csr); - printk("%s: CSR_DREV 0x%lx\n", __FUNCTION__, TITAN_dchip->drev.csr); + printk("%s: titan_init_arch()\n", __func__); + printk("%s: CChip registers:\n", __func__); + printk("%s: CSR_CSC 0x%lx\n", __func__, TITAN_cchip->csc.csr); + printk("%s: CSR_MTR 0x%lx\n", __func__, TITAN_cchip->mtr.csr); + printk("%s: CSR_MISC 0x%lx\n", __func__, TITAN_cchip->misc.csr); + printk("%s: CSR_DIM0 0x%lx\n", __func__, TITAN_cchip->dim0.csr); + printk("%s: CSR_DIM1 0x%lx\n", __func__, TITAN_cchip->dim1.csr); + printk("%s: CSR_DIR0 0x%lx\n", __func__, TITAN_cchip->dir0.csr); + printk("%s: CSR_DIR1 0x%lx\n", __func__, TITAN_cchip->dir1.csr); + printk("%s: CSR_DRIR 0x%lx\n", __func__, TITAN_cchip->drir.csr); + + printk("%s: DChip registers:\n", __func__); + printk("%s: CSR_DSC 0x%lx\n", __func__, TITAN_dchip->dsc.csr); + printk("%s: CSR_STR 0x%lx\n", __func__, TITAN_dchip->str.csr); + printk("%s: CSR_DREV 0x%lx\n", __func__, TITAN_dchip->drev.csr); #endif boot_cpuid = __hard_smp_processor_id(); @@ -700,13 +700,13 @@ titan_agp_translate(alpha_agp_info *agp, dma_addr_t addr) if (addr < agp->aperture.bus_base || addr >= agp->aperture.bus_base + agp->aperture.size) { - printk("%s: addr out of range\n", __FUNCTION__); + printk("%s: addr out of range\n", __func__); return -EINVAL; } pte = aper->arena->ptes[baddr >> PAGE_SHIFT]; if (!(pte & 1)) { - printk("%s: pte not valid\n", __FUNCTION__); + printk("%s: pte not valid\n", __func__); return -EINVAL; } diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c index ef91e09590d..5e7c28f92f1 100644 --- a/arch/alpha/kernel/core_tsunami.c +++ b/arch/alpha/kernel/core_tsunami.c @@ -241,8 +241,6 @@ tsunami_probe_write(volatile unsigned long *vaddr) #define tsunami_probe_read(ADDR) 1 #endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */ -#define FN __FUNCTION__ - static void __init tsunami_init_one_pchip(tsunami_pchip *pchip, int index) { @@ -383,27 +381,27 @@ tsunami_init_arch(void) /* NXMs just don't matter to Tsunami--unless they make it choke completely. */ tmp = (unsigned long)(TSUNAMI_cchip - 1); - printk("%s: probing bogus address: 0x%016lx\n", FN, bogus_addr); + printk("%s: probing bogus address: 0x%016lx\n", __func__, bogus_addr); printk("\tprobe %s\n", tsunami_probe_write((unsigned long *)bogus_addr) ? "succeeded" : "failed"); #endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */ #if 0 - printk("%s: CChip registers:\n", FN); - printk("%s: CSR_CSC 0x%lx\n", FN, TSUNAMI_cchip->csc.csr); - printk("%s: CSR_MTR 0x%lx\n", FN, TSUNAMI_cchip.mtr.csr); - printk("%s: CSR_MISC 0x%lx\n", FN, TSUNAMI_cchip->misc.csr); - printk("%s: CSR_DIM0 0x%lx\n", FN, TSUNAMI_cchip->dim0.csr); - printk("%s: CSR_DIM1 0x%lx\n", FN, TSUNAMI_cchip->dim1.csr); - printk("%s: CSR_DIR0 0x%lx\n", FN, TSUNAMI_cchip->dir0.csr); - printk("%s: CSR_DIR1 0x%lx\n", FN, TSUNAMI_cchip->dir1.csr); - printk("%s: CSR_DRIR 0x%lx\n", FN, TSUNAMI_cchip->drir.csr); + printk("%s: CChip registers:\n", __func__); + printk("%s: CSR_CSC 0x%lx\n", __func__, TSUNAMI_cchip->csc.csr); + printk("%s: CSR_MTR 0x%lx\n", __func__, TSUNAMI_cchip.mtr.csr); + printk("%s: CSR_MISC 0x%lx\n", __func__, TSUNAMI_cchip->misc.csr); + printk("%s: CSR_DIM0 0x%lx\n", __func__, TSUNAMI_cchip->dim0.csr); + printk("%s: CSR_DIM1 0x%lx\n", __func__, TSUNAMI_cchip->dim1.csr); + printk("%s: CSR_DIR0 0x%lx\n", __func__, TSUNAMI_cchip->dir0.csr); + printk("%s: CSR_DIR1 0x%lx\n", __func__, TSUNAMI_cchip->dir1.csr); + printk("%s: CSR_DRIR 0x%lx\n", __func__, TSUNAMI_cchip->drir.csr); printk("%s: DChip registers:\n"); - printk("%s: CSR_DSC 0x%lx\n", FN, TSUNAMI_dchip->dsc.csr); - printk("%s: CSR_STR 0x%lx\n", FN, TSUNAMI_dchip->str.csr); - printk("%s: CSR_DREV 0x%lx\n", FN, TSUNAMI_dchip->drev.csr); + printk("%s: CSR_DSC 0x%lx\n", __func__, TSUNAMI_dchip->dsc.csr); + printk("%s: CSR_STR 0x%lx\n", __func__, TSUNAMI_dchip->str.csr); + printk("%s: CSR_DREV 0x%lx\n", __func__, TSUNAMI_dchip->drev.csr); #endif /* With multiple PCI busses, we play with I/O as physical addrs. */ ioport_resource.end = ~0UL; diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index 026ba9af6d6..ebc3c894b5a 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -120,6 +120,12 @@ module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs, nsyms = symtab->sh_size / sizeof(Elf64_Sym); chains = kcalloc(nsyms, sizeof(struct got_entry), GFP_KERNEL); + if (!chains) { + printk(KERN_ERR + "module %s: no memory for symbol chain buffer\n", + me->name); + return -ENOMEM; + } got->sh_size = 0; got->sh_addralign = 8; diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 78357798b6f..baf57563b14 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -208,7 +208,7 @@ pdev_save_srm_config(struct pci_dev *dev) tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); if (!tmp) { - printk(KERN_ERR "%s: kmalloc() failed!\n", __FUNCTION__); + printk(KERN_ERR "%s: kmalloc() failed!\n", __func__); return; } tmp->next = srm_saved_configs; diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index dd6e334ab9e..2179c602032 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -79,25 +79,21 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base, #ifdef CONFIG_DISCONTIGMEM - if (!NODE_DATA(nid) || - (NULL == (arena = alloc_bootmem_node(NODE_DATA(nid), - sizeof(*arena))))) { - printk("%s: couldn't allocate arena from node %d\n" - " falling back to system-wide allocation\n", - __FUNCTION__, nid); - arena = alloc_bootmem(sizeof(*arena)); - } - - if (!NODE_DATA(nid) || - (NULL == (arena->ptes = __alloc_bootmem_node(NODE_DATA(nid), - mem_size, - align, - 0)))) { - printk("%s: couldn't allocate arena ptes from node %d\n" - " falling back to system-wide allocation\n", - __FUNCTION__, nid); - arena->ptes = __alloc_bootmem(mem_size, align, 0); - } + arena = alloc_bootmem_node(NODE_DATA(nid), sizeof(*arena)); + if (!NODE_DATA(nid) || !arena) { + printk("%s: couldn't allocate arena from node %d\n" + " falling back to system-wide allocation\n", + __func__, nid); + arena = alloc_bootmem(sizeof(*arena)); + } + + arena->ptes = __alloc_bootmem_node(NODE_DATA(nid), mem_size, align, 0); + if (!NODE_DATA(nid) || !arena->ptes) { + printk("%s: couldn't allocate arena ptes from node %d\n" + " falling back to system-wide allocation\n", + __func__, nid); + arena->ptes = __alloc_bootmem(mem_size, align, 0); + } #else /* CONFIG_DISCONTIGMEM */ diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 63c2073401e..2525692db0a 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -755,7 +755,7 @@ smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry, if (atomic_read(&data.unstarted_count) > 0) { long start_time = jiffies; printk(KERN_ERR "%s: initial timeout -- trying long wait\n", - __FUNCTION__); + __func__); timeout = jiffies + 30 * HZ; while (atomic_read(&data.unstarted_count) > 0 && time_before(jiffies, timeout)) @@ -764,7 +764,7 @@ smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry, long delta = jiffies - start_time; printk(KERN_ERR "%s: response %ld.%ld seconds into long wait\n", - __FUNCTION__, delta / HZ, + __func__, delta / HZ, (100 * (delta - ((delta / HZ) * HZ))) / HZ); } } diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c index f7dd081d57f..78ad7cd1bbd 100644 --- a/arch/alpha/kernel/srm_env.c +++ b/arch/alpha/kernel/srm_env.c @@ -199,7 +199,7 @@ srm_env_init(void) printk(KERN_INFO "%s: This Alpha system doesn't " "know about SRM (or you've booted " "SRM->MILO->Linux, which gets " - "misdetected)...\n", __FUNCTION__); + "misdetected)...\n", __func__); return -ENODEV; } diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c index d187d01d2a1..e53a1e1c2f2 100644 --- a/arch/alpha/kernel/sys_alcor.c +++ b/arch/alpha/kernel/sys_alcor.c @@ -259,7 +259,7 @@ alcor_init_pci(void) if (dev && dev->devfn == PCI_DEVFN(6,0)) { alpha_mv.sys.cia.gru_int_req_bits = XLT_GRU_INT_REQ_BITS; printk(KERN_INFO "%s: Detected AS500 or XLT motherboard.\n", - __FUNCTION__); + __func__); } pci_dev_put(dev); } diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index 922143ea1cd..828449cd263 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -80,7 +80,7 @@ io7_get_irq_ctl(unsigned int irq, struct io7 **pio7) if (!(io7 = marvel_find_io7(pid))) { printk(KERN_ERR "%s for nonexistent io7 -- vec %x, pid %d\n", - __FUNCTION__, irq, pid); + __func__, irq, pid); return NULL; } @@ -90,7 +90,7 @@ io7_get_irq_ctl(unsigned int irq, struct io7 **pio7) if (irq >= 0x180) { printk(KERN_ERR "%s for invalid irq -- pid %d adjusted irq %x\n", - __FUNCTION__, pid, irq); + __func__, pid, irq); return NULL; } @@ -110,8 +110,8 @@ io7_enable_irq(unsigned int irq) ctl = io7_get_irq_ctl(irq, &io7); if (!ctl || !io7) { - printk(KERN_ERR "%s: get_ctl failed for irq %x\n", - __FUNCTION__, irq); + printk(KERN_ERR "%s: get_ctl failed for irq %x\n", + __func__, irq); return; } @@ -130,8 +130,8 @@ io7_disable_irq(unsigned int irq) ctl = io7_get_irq_ctl(irq, &io7); if (!ctl || !io7) { - printk(KERN_ERR "%s: get_ctl failed for irq %x\n", - __FUNCTION__, irq); + printk(KERN_ERR "%s: get_ctl failed for irq %x\n", + __func__, irq); return; } diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index 906019cfa68..99a7f19da13 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -454,7 +454,7 @@ sable_lynx_enable_irq(unsigned int irq) spin_unlock(&sable_lynx_irq_lock); #if 0 printk("%s: mask 0x%lx bit 0x%x irq 0x%x\n", - __FUNCTION__, mask, bit, irq); + __func__, mask, bit, irq); #endif } @@ -470,7 +470,7 @@ sable_lynx_disable_irq(unsigned int irq) spin_unlock(&sable_lynx_irq_lock); #if 0 printk("%s: mask 0x%lx bit 0x%x irq 0x%x\n", - __FUNCTION__, mask, bit, irq); + __func__, mask, bit, irq); #endif } @@ -524,7 +524,7 @@ sable_lynx_srm_device_interrupt(unsigned long vector) irq = sable_lynx_irq_swizzle->mask_to_irq[bit]; #if 0 printk("%s: vector 0x%lx bit 0x%x irq 0x%x\n", - __FUNCTION__, vector, bit, irq); + __func__, vector, bit, irq); #endif handle_irq(irq); } diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c index ee7b9009ebb..d4327e461c2 100644 --- a/arch/alpha/kernel/sys_sio.c +++ b/arch/alpha/kernel/sys_sio.c @@ -89,7 +89,7 @@ sio_pci_route(void) /* First, ALWAYS read and print the original setting. */ pci_bus_read_config_dword(pci_isa_hose->bus, PCI_DEVFN(7, 0), 0x60, &orig_route_tab); - printk("%s: PIRQ original 0x%x new 0x%x\n", __FUNCTION__, + printk("%s: PIRQ original 0x%x new 0x%x\n", __func__, orig_route_tab, alpha_mv.sys.sio.route_tab); #if defined(ALPHA_RESTORE_SRM_SETUP) diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 2dc7f9fed21..dc57790250d 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -8,6 +8,7 @@ * This file initializes the trap entry points */ +#include <linux/jiffies.h> #include <linux/mm.h> #include <linux/sched.h> #include <linux/tty.h> @@ -770,7 +771,7 @@ do_entUnaUser(void __user * va, unsigned long opcode, unsigned long reg, struct pt_regs *regs) { static int cnt = 0; - static long last_time = 0; + static unsigned long last_time; unsigned long tmp1, tmp2, tmp3, tmp4; unsigned long fake_reg, *reg_addr = &fake_reg; @@ -781,7 +782,7 @@ do_entUnaUser(void __user * va, unsigned long opcode, with the unaliged access. */ if (!test_thread_flag (TIF_UAC_NOPRINT)) { - if (cnt >= 5 && jiffies - last_time > 5*HZ) { + if (cnt >= 5 && time_after(jiffies, last_time + 5 * HZ)) { cnt = 0; } if (++cnt < 5) { diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c index 37cd547855b..728bb8f3944 100644 --- a/arch/arm/mach-at91/at91sam9261_devices.c +++ b/arch/arm/mach-at91/at91sam9261_devices.c @@ -539,6 +539,17 @@ void __init at91_add_device_lcdc(struct atmel_lcdfb_info *data) at91_set_B_periph(AT91_PIN_PB28, 0); /* LCDD23 */ #endif + if (ARRAY_SIZE(lcdc_resources) > 2) { + void __iomem *fb; + struct resource *fb_res = &lcdc_resources[2]; + size_t fb_len = fb_res->end - fb_res->start + 1; + + fb = ioremap_writecombine(fb_res->start, fb_len); + if (fb) { + memset(fb, 0, fb_len); + iounmap(fb, fb_len); + } + } lcdc_data = *data; platform_device_register(&at91_lcdc_device); } diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c index dbb9a5fc209..054689804e7 100644 --- a/arch/arm/mach-at91/at91sam9rl_devices.c +++ b/arch/arm/mach-at91/at91sam9rl_devices.c @@ -381,6 +381,20 @@ void __init at91_add_device_lcdc(struct atmel_lcdfb_info *data) at91_set_B_periph(AT91_PIN_PC24, 0); /* LCDD22 */ at91_set_B_periph(AT91_PIN_PC25, 0); /* LCDD23 */ +#ifdef CONFIG_FB_INTSRAM + { + void __iomem *fb; + struct resource *fb_res = &lcdc_resources[2]; + size_t fb_len = fb_res->end - fb_res->start + 1; + + fb = ioremap_writecombine(fb_res->start, fb_len); + if (fb) { + memset(fb, 0, fb_len); + iounmap(fb, fb_len); + } + } +#endif + lcdc_data = *data; platform_device_register(&at91_lcdc_device); } diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c index 2687b730e2d..ce48c14f434 100644 --- a/arch/avr32/kernel/setup.c +++ b/arch/avr32/kernel/setup.c @@ -274,6 +274,8 @@ static int __init early_parse_fbmem(char *p) printk(KERN_WARNING "Failed to allocate framebuffer memory\n"); fbmem_size = 0; + } else { + memset(__va(fbmem_start), 0, fbmem_size); } } diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index 4207a2b5275..5b06ffa15e3 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c @@ -27,7 +27,6 @@ show_mem(void) printk("\nMem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); i = max_mapnr; while (i-- > 0) { total++; diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index 7089c2428b3..a40df80b2eb 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -49,7 +49,7 @@ asmlinkage void insn_access_error(unsigned long esfr1, unsigned long epcr0, unsi info.si_signo = SIGSEGV; info.si_code = SEGV_ACCERR; info.si_errno = 0; - info.si_addr = (void *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc); + info.si_addr = (void __user *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc); force_sig_info(info.si_signo, &info, current); } /* end insn_access_error() */ @@ -73,7 +73,7 @@ asmlinkage void illegal_instruction(unsigned long esfr1, unsigned long epcr0, un epcr0, esr0, esfr1); info.si_errno = 0; - info.si_addr = (void *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc); + info.si_addr = (void __user *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc); switch (__frame->tbr & TBR_TT) { case TBR_TT_ILLEGAL_INSTR: @@ -111,7 +111,8 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0, unsigned long esr0) { static DEFINE_SPINLOCK(atomic_op_lock); - unsigned long x, y, z, *p; + unsigned long x, y, z; + unsigned long __user *p; mm_segment_t oldfs; siginfo_t info; int ret; @@ -128,7 +129,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0, * u32 __atomic_user_cmpxchg32(u32 *ptr, u32 test, u32 new) */ case TBR_TT_ATOMIC_CMPXCHG32: - p = (unsigned long *) __frame->gr8; + p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; y = __frame->gr10; @@ -158,7 +159,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0, * u32 __atomic_kernel_xchg32(void *v, u32 new) */ case TBR_TT_ATOMIC_XCHG32: - p = (unsigned long *) __frame->gr8; + p = (unsigned long __user *) __frame->gr8; y = __frame->gr9; for (;;) { @@ -181,7 +182,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0, * ulong __atomic_kernel_XOR_return(ulong i, ulong *v) */ case TBR_TT_ATOMIC_XOR: - p = (unsigned long *) __frame->gr8; + p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; for (;;) { @@ -205,7 +206,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0, * ulong __atomic_kernel_OR_return(ulong i, ulong *v) */ case TBR_TT_ATOMIC_OR: - p = (unsigned long *) __frame->gr8; + p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; for (;;) { @@ -229,7 +230,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0, * ulong __atomic_kernel_AND_return(ulong i, ulong *v) */ case TBR_TT_ATOMIC_AND: - p = (unsigned long *) __frame->gr8; + p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; for (;;) { @@ -253,7 +254,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0, * int __atomic_user_sub_return(atomic_t *v, int i) */ case TBR_TT_ATOMIC_SUB: - p = (unsigned long *) __frame->gr8; + p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; for (;;) { @@ -277,7 +278,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0, * int __atomic_user_add_return(atomic_t *v, int i) */ case TBR_TT_ATOMIC_ADD: - p = (unsigned long *) __frame->gr8; + p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; for (;;) { @@ -322,7 +323,7 @@ error: info.si_signo = SIGSEGV; info.si_code = SEGV_ACCERR; info.si_errno = 0; - info.si_addr = (void *) __frame->pc; + info.si_addr = (void __user *) __frame->pc; force_sig_info(info.si_signo, &info, current); } @@ -343,7 +344,7 @@ asmlinkage void media_exception(unsigned long msr0, unsigned long msr1) info.si_signo = SIGFPE; info.si_code = FPE_MDAOVF; info.si_errno = 0; - info.si_addr = (void *) __frame->pc; + info.si_addr = (void __user *) __frame->pc; force_sig_info(info.si_signo, &info, current); } /* end media_exception() */ @@ -383,7 +384,7 @@ asmlinkage void memory_access_exception(unsigned long esr0, info.si_addr = NULL; if ((esr0 & (ESRx_VALID | ESR0_EAV)) == (ESRx_VALID | ESR0_EAV)) - info.si_addr = (void *) ear0; + info.si_addr = (void __user *) ear0; force_sig_info(info.si_signo, &info, current); @@ -412,7 +413,7 @@ asmlinkage void data_access_error(unsigned long esfr1, unsigned long esr15, unsi info.si_signo = SIGSEGV; info.si_code = SEGV_ACCERR; info.si_errno = 0; - info.si_addr = (void *) + info.si_addr = (void __user *) (((esr15 & (ESRx_VALID|ESR15_EAV)) == (ESRx_VALID|ESR15_EAV)) ? ear15 : 0); force_sig_info(info.si_signo, &info, current); @@ -446,7 +447,7 @@ asmlinkage void division_exception(unsigned long esfr1, unsigned long esr0, unsi info.si_signo = SIGFPE; info.si_code = FPE_INTDIV; info.si_errno = 0; - info.si_addr = (void *) __frame->pc; + info.si_addr = (void __user *) __frame->pc; force_sig_info(info.si_signo, &info, current); } /* end division_exception() */ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index cd13e138bd0..3aa6c821449 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -19,6 +19,7 @@ config IA64 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_KVM default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -589,6 +590,8 @@ config MSPEC source "fs/Kconfig" +source "arch/ia64/kvm/Kconfig" + source "lib/Kconfig" # diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index f1645c4f703..ec4cca477f4 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ +core-$(CONFIG_KVM) += arch/ia64/kvm/ drivers-$(CONFIG_PCI) += arch/ia64/pci/ drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig new file mode 100644 index 00000000000..7914e482850 --- /dev/null +++ b/arch/ia64/kvm/Kconfig @@ -0,0 +1,49 @@ +# +# KVM configuration +# +config HAVE_KVM + bool + +menuconfig VIRTUALIZATION + bool "Virtualization" + depends on HAVE_KVM || IA64 + default y + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + ---help--- + Support hosting fully virtualized guest machines using hardware + virtualization extensions. You will need a fairly recent + processor equipped with virtualization extensions. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_INTEL + tristate "KVM for Intel Itanium 2 processors support" + depends on KVM && m + ---help--- + Provides support for KVM on Itanium 2 processors equipped with the VT + extensions. + +config KVM_TRACE + bool + +endif # VIRTUALIZATION diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile new file mode 100644 index 00000000000..52353397a1a --- /dev/null +++ b/arch/ia64/kvm/Makefile @@ -0,0 +1,58 @@ +#This Make file is to generate asm-offsets.h and build source. +# + +#Generate asm-offsets.h for vmm module build +offsets-file := asm-offsets.h + +always := $(offsets-file) +targets := $(offsets-file) +targets += arch/ia64/kvm/asm-offsets.s +clean-files := $(addprefix $(objtree)/,$(targets) $(obj)/memcpy.S $(obj)/memset.S) + +# Default sed regexp - multiline due to syntax constraints +define sed-y + "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" +endef + +quiet_cmd_offsets = GEN $@ +define cmd_offsets + (set -e; \ + echo "#ifndef __ASM_KVM_OFFSETS_H__"; \ + echo "#define __ASM_KVM_OFFSETS_H__"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Makefile"; \ + echo " *"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-y) $<; \ + echo ""; \ + echo "#endif" ) > $@ +endef +# We use internal rules to avoid the "is up to date" message from make +arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c + $(call if_changed_dep,cc_s_c) + +$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s + $(call cmd,offsets) + +# +# Makefile for Kernel-based Virtual Machine module +# + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ +EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) + +kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o +obj-$(CONFIG_KVM) += kvm.o + +FORCE : $(obj)/$(offsets-file) +EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 +kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ + vtlb.o process.o +#Add link memcpy and memset to avoid possible structure assignment error +kvm-intel-objs += ../lib/memset.o ../lib/memcpy.o +obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c new file mode 100644 index 00000000000..4e3dc13a619 --- /dev/null +++ b/arch/ia64/kvm/asm-offsets.c @@ -0,0 +1,251 @@ +/* + * asm-offsets.c Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + * + * Anthony Xu <anthony.xu@intel.com> + * Xiantao Zhang <xiantao.zhang@intel.com> + * Copyright (c) 2007 Intel Corporation KVM support. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/autoconf.h> +#include <linux/kvm_host.h> + +#include "vcpu.h" + +#define task_struct kvm_vcpu + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " (%0) " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : :) + +#define OFFSET(_sym, _str, _mem) \ + DEFINE(_sym, offsetof(_str, _mem)); + +void foo(void) +{ + DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); + DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs)); + + BLANK(); + + DEFINE(VMM_VCPU_META_RR0_OFFSET, + offsetof(struct kvm_vcpu, arch.metaphysical_rr0)); + DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, + offsetof(struct kvm_vcpu, + arch.metaphysical_saved_rr0)); + DEFINE(VMM_VCPU_VRR0_OFFSET, + offsetof(struct kvm_vcpu, arch.vrr[0])); + DEFINE(VMM_VPD_IRR0_OFFSET, + offsetof(struct vpd, irr[0])); + DEFINE(VMM_VCPU_ITC_CHECK_OFFSET, + offsetof(struct kvm_vcpu, arch.itc_check)); + DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET, + offsetof(struct kvm_vcpu, arch.irq_check)); + DEFINE(VMM_VPD_VHPI_OFFSET, + offsetof(struct vpd, vhpi)); + DEFINE(VMM_VCPU_VSA_BASE_OFFSET, + offsetof(struct kvm_vcpu, arch.vsa_base)); + DEFINE(VMM_VCPU_VPD_OFFSET, + offsetof(struct kvm_vcpu, arch.vpd)); + DEFINE(VMM_VCPU_IRQ_CHECK, + offsetof(struct kvm_vcpu, arch.irq_check)); + DEFINE(VMM_VCPU_TIMER_PENDING, + offsetof(struct kvm_vcpu, arch.timer_pending)); + DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, + offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0)); + DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, + offsetof(struct kvm_vcpu, arch.mode_flags)); + DEFINE(VMM_VCPU_ITC_OFS_OFFSET, + offsetof(struct kvm_vcpu, arch.itc_offset)); + DEFINE(VMM_VCPU_LAST_ITC_OFFSET, + offsetof(struct kvm_vcpu, arch.last_itc)); + DEFINE(VMM_VCPU_SAVED_GP_OFFSET, + offsetof(struct kvm_vcpu, arch.saved_gp)); + + BLANK(); + + DEFINE(VMM_PT_REGS_B6_OFFSET, + offsetof(struct kvm_pt_regs, b6)); + DEFINE(VMM_PT_REGS_B7_OFFSET, + offsetof(struct kvm_pt_regs, b7)); + DEFINE(VMM_PT_REGS_AR_CSD_OFFSET, + offsetof(struct kvm_pt_regs, ar_csd)); + DEFINE(VMM_PT_REGS_AR_SSD_OFFSET, + offsetof(struct kvm_pt_regs, ar_ssd)); + DEFINE(VMM_PT_REGS_R8_OFFSET, + offsetof(struct kvm_pt_regs, r8)); + DEFINE(VMM_PT_REGS_R9_OFFSET, + offsetof(struct kvm_pt_regs, r9)); + DEFINE(VMM_PT_REGS_R10_OFFSET, + offsetof(struct kvm_pt_regs, r10)); + DEFINE(VMM_PT_REGS_R11_OFFSET, + offsetof(struct kvm_pt_regs, r11)); + DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET, + offsetof(struct kvm_pt_regs, cr_ipsr)); + DEFINE(VMM_PT_REGS_CR_IIP_OFFSET, + offsetof(struct kvm_pt_regs, cr_iip)); + DEFINE(VMM_PT_REGS_CR_IFS_OFFSET, + offsetof(struct kvm_pt_regs, cr_ifs)); + DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET, + offsetof(struct kvm_pt_regs, ar_unat)); + DEFINE(VMM_PT_REGS_AR_PFS_OFFSET, + offsetof(struct kvm_pt_regs, ar_pfs)); + DEFINE(VMM_PT_REGS_AR_RSC_OFFSET, + offsetof(struct kvm_pt_regs, ar_rsc)); + DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET, + offsetof(struct kvm_pt_regs, ar_rnat)); + + DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET, + offsetof(struct kvm_pt_regs, ar_bspstore)); + DEFINE(VMM_PT_REGS_PR_OFFSET, + offsetof(struct kvm_pt_regs, pr)); + DEFINE(VMM_PT_REGS_B0_OFFSET, + offsetof(struct kvm_pt_regs, b0)); + DEFINE(VMM_PT_REGS_LOADRS_OFFSET, + offsetof(struct kvm_pt_regs, loadrs)); + DEFINE(VMM_PT_REGS_R1_OFFSET, + offsetof(struct kvm_pt_regs, r1)); + DEFINE(VMM_PT_REGS_R12_OFFSET, + offsetof(struct kvm_pt_regs, r12)); + DEFINE(VMM_PT_REGS_R13_OFFSET, + offsetof(struct kvm_pt_regs, r13)); + DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET, + offsetof(struct kvm_pt_regs, ar_fpsr)); + DEFINE(VMM_PT_REGS_R15_OFFSET, + offsetof(struct kvm_pt_regs, r15)); + DEFINE(VMM_PT_REGS_R14_OFFSET, + offsetof(struct kvm_pt_regs, r14)); + DEFINE(VMM_PT_REGS_R2_OFFSET, + offsetof(struct kvm_pt_regs, r2)); + DEFINE(VMM_PT_REGS_R3_OFFSET, + offsetof(struct kvm_pt_regs, r3)); + DEFINE(VMM_PT_REGS_R16_OFFSET, + offsetof(struct kvm_pt_regs, r16)); + DEFINE(VMM_PT_REGS_R17_OFFSET, + offsetof(struct kvm_pt_regs, r17)); + DEFINE(VMM_PT_REGS_R18_OFFSET, + offsetof(struct kvm_pt_regs, r18)); + DEFINE(VMM_PT_REGS_R19_OFFSET, + offsetof(struct kvm_pt_regs, r19)); + DEFINE(VMM_PT_REGS_R20_OFFSET, + offsetof(struct kvm_pt_regs, r20)); + DEFINE(VMM_PT_REGS_R21_OFFSET, + offsetof(struct kvm_pt_regs, r21)); + DEFINE(VMM_PT_REGS_R22_OFFSET, + offsetof(struct kvm_pt_regs, r22)); + DEFINE(VMM_PT_REGS_R23_OFFSET, + offsetof(struct kvm_pt_regs, r23)); + DEFINE(VMM_PT_REGS_R24_OFFSET, + offsetof(struct kvm_pt_regs, r24)); + DEFINE(VMM_PT_REGS_R25_OFFSET, + offsetof(struct kvm_pt_regs, r25)); + DEFINE(VMM_PT_REGS_R26_OFFSET, + offsetof(struct kvm_pt_regs, r26)); + DEFINE(VMM_PT_REGS_R27_OFFSET, + offsetof(struct kvm_pt_regs, r27)); + DEFINE(VMM_PT_REGS_R28_OFFSET, + offsetof(struct kvm_pt_regs, r28)); + DEFINE(VMM_PT_REGS_R29_OFFSET, + offsetof(struct kvm_pt_regs, r29)); + DEFINE(VMM_PT_REGS_R30_OFFSET, + offsetof(struct kvm_pt_regs, r30)); + DEFINE(VMM_PT_REGS_R31_OFFSET, + offsetof(struct kvm_pt_regs, r31)); + DEFINE(VMM_PT_REGS_AR_CCV_OFFSET, + offsetof(struct kvm_pt_regs, ar_ccv)); + DEFINE(VMM_PT_REGS_F6_OFFSET, + offsetof(struct kvm_pt_regs, f6)); + DEFINE(VMM_PT_REGS_F7_OFFSET, + offsetof(struct kvm_pt_regs, f7)); + DEFINE(VMM_PT_REGS_F8_OFFSET, + offsetof(struct kvm_pt_regs, f8)); + DEFINE(VMM_PT_REGS_F9_OFFSET, + offsetof(struct kvm_pt_regs, f9)); + DEFINE(VMM_PT_REGS_F10_OFFSET, + offsetof(struct kvm_pt_regs, f10)); + DEFINE(VMM_PT_REGS_F11_OFFSET, + offsetof(struct kvm_pt_regs, f11)); + DEFINE(VMM_PT_REGS_R4_OFFSET, + offsetof(struct kvm_pt_regs, r4)); + DEFINE(VMM_PT_REGS_R5_OFFSET, + offsetof(struct kvm_pt_regs, r5)); + DEFINE(VMM_PT_REGS_R6_OFFSET, + offsetof(struct kvm_pt_regs, r6)); + DEFINE(VMM_PT_REGS_R7_OFFSET, + offsetof(struct kvm_pt_regs, r7)); + DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET, + offsetof(struct kvm_pt_regs, eml_unat)); + DEFINE(VMM_VCPU_IIPA_OFFSET, + offsetof(struct kvm_vcpu, arch.cr_iipa)); + DEFINE(VMM_VCPU_OPCODE_OFFSET, + offsetof(struct kvm_vcpu, arch.opcode)); + DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause)); + DEFINE(VMM_VCPU_ISR_OFFSET, + offsetof(struct kvm_vcpu, arch.cr_isr)); + DEFINE(VMM_PT_REGS_R16_SLOT, + (((offsetof(struct kvm_pt_regs, r16) + - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f)); + DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, + offsetof(struct kvm_vcpu, arch.mode_flags)); + DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp)); + BLANK(); + + DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd)); + DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs)); + DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET, + offsetof(struct kvm_vcpu, arch.insvc[0])); + DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta)); + DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr)); + + DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4])); + DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5])); + DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12])); + DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13])); + DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0])); + DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1])); + DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0])); + DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1])); + DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2])); + DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0])); + DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16])); + DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18])); + DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19])); + DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21])); + DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24])); + DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27])); + DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28])); + DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29])); + DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30])); + DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36])); + DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40])); + DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64])); + DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65])); + DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0])); + DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2])); + DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8])); + DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0])); + DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0])); + DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2])); + DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3])); + DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32])); + DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33])); + DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0])); + DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr)); + BLANK(); +} diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c new file mode 100644 index 00000000000..6df07324013 --- /dev/null +++ b/arch/ia64/kvm/kvm-ia64.c @@ -0,0 +1,1806 @@ + +/* + * kvm_ia64.c: Basic KVM suppport On Itanium series processors + * + * + * Copyright (C) 2007, Intel Corporation. + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/percpu.h> +#include <linux/gfp.h> +#include <linux/fs.h> +#include <linux/smp.h> +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/bitops.h> +#include <linux/hrtimer.h> +#include <linux/uaccess.h> + +#include <asm/pgtable.h> +#include <asm/gcc_intrin.h> +#include <asm/pal.h> +#include <asm/cacheflush.h> +#include <asm/div64.h> +#include <asm/tlb.h> + +#include "misc.h" +#include "vti.h" +#include "iodev.h" +#include "ioapic.h" +#include "lapic.h" + +static unsigned long kvm_vmm_base; +static unsigned long kvm_vsa_base; +static unsigned long kvm_vm_buffer; +static unsigned long kvm_vm_buffer_size; +unsigned long kvm_vmm_gp; + +static long vp_env_info; + +static struct kvm_vmm_info *kvm_vmm_info; + +static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu); + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + + +struct fdesc{ + unsigned long ip; + unsigned long gp; +}; + +static void kvm_flush_icache(unsigned long start, unsigned long len) +{ + int l; + + for (l = 0; l < (len + 32); l += 32) + ia64_fc(start + l); + + ia64_sync_i(); + ia64_srlz_i(); +} + +static void kvm_flush_tlb_all(void) +{ + unsigned long i, j, count0, count1, stride0, stride1, addr; + long flags; + + addr = local_cpu_data->ptce_base; + count0 = local_cpu_data->ptce_count[0]; + count1 = local_cpu_data->ptce_count[1]; + stride0 = local_cpu_data->ptce_stride[0]; + stride1 = local_cpu_data->ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, + (u64)opt_handler); + + return iprv.status; +} + +static DEFINE_SPINLOCK(vp_lock); + +void kvm_arch_hardware_enable(void *garbage) +{ + long status; + long tmp_base; + unsigned long pte; + unsigned long saved_psr; + int slot; + + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), + PAGE_KERNEL)); + local_irq_save(saved_psr); + slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (slot < 0) + return; + local_irq_restore(saved_psr); + + spin_lock(&vp_lock); + status = ia64_pal_vp_init_env(kvm_vsa_base ? + VP_INIT_ENV : VP_INIT_ENV_INITALIZE, + __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); + if (status != 0) { + printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); + return ; + } + + if (!kvm_vsa_base) { + kvm_vsa_base = tmp_base; + printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base); + } + spin_unlock(&vp_lock); + ia64_ptr_entry(0x3, slot); +} + +void kvm_arch_hardware_disable(void *garbage) +{ + + long status; + int slot; + unsigned long pte; + unsigned long saved_psr; + unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA); + + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), + PAGE_KERNEL)); + + local_irq_save(saved_psr); + slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (slot < 0) + return; + local_irq_restore(saved_psr); + + status = ia64_pal_vp_exit_env(host_iva); + if (status) + printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n", + status); + ia64_ptr_entry(0x3, slot); +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + *(int *)rtn = 0; +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + + int r; + + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_USER_MEMORY: + + r = 1; + break; + default: + r = 0; + } + return r; + +} + +static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, + gpa_t addr) +{ + struct kvm_io_device *dev; + + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + + return dev; +} + +static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 1; + return 0; +} + +static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct kvm_mmio_req *p; + struct kvm_io_device *mmio_dev; + + p = kvm_get_vcpu_ioreq(vcpu); + + if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) + goto mmio; + vcpu->mmio_needed = 1; + vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; + vcpu->mmio_size = kvm_run->mmio.len = p->size; + vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; + + if (vcpu->mmio_is_write) + memcpy(vcpu->mmio_data, &p->data, p->size); + memcpy(kvm_run->mmio.data, &p->data, p->size); + kvm_run->exit_reason = KVM_EXIT_MMIO; + return 0; +mmio: + mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr); + if (mmio_dev) { + if (!p->dir) + kvm_iodevice_write(mmio_dev, p->addr, p->size, + &p->data); + else + kvm_iodevice_read(mmio_dev, p->addr, p->size, + &p->data); + + } else + printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); + p->state = STATE_IORESP_READY; + + return 1; +} + +static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p->exit_reason == EXIT_REASON_PAL_CALL) + return kvm_pal_emul(vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 2; + return 0; + } +} + +static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + kvm_sal_emul(vcpu); + return 1; + } else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 3; + return 0; + } + +} + +/* + * offset: address offset to IPI space. + * value: deliver value. + */ +static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, + uint64_t vector) +{ + switch (dm) { + case SAPIC_FIXED: + kvm_apic_set_irq(vcpu, vector, 0); + break; + case SAPIC_NMI: + kvm_apic_set_irq(vcpu, 2, 0); + break; + case SAPIC_EXTINT: + kvm_apic_set_irq(vcpu, 0, 0); + break; + case SAPIC_INIT: + case SAPIC_PMI: + default: + printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); + break; + } +} + +static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, + unsigned long eid) +{ + union ia64_lid lid; + int i; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + if (kvm->vcpus[i]) { + lid.val = VCPU_LID(kvm->vcpus[i]); + if (lid.id == id && lid.eid == eid) + return kvm->vcpus[i]; + } + } + + return NULL; +} + +static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); + struct kvm_vcpu *target_vcpu; + struct kvm_pt_regs *regs; + union ia64_ipi_a addr = p->u.ipi_data.addr; + union ia64_ipi_d data = p->u.ipi_data.data; + + target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); + if (!target_vcpu) + return handle_vm_error(vcpu, kvm_run); + + if (!target_vcpu->arch.launched) { + regs = vcpu_regs(target_vcpu); + + regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; + regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; + + target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (waitqueue_active(&target_vcpu->wq)) + wake_up_interruptible(&target_vcpu->wq); + } else { + vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); + if (target_vcpu != vcpu) + kvm_vcpu_kick(target_vcpu); + } + + return 1; +} + +struct call_data { + struct kvm_ptc_g ptc_g_data; + struct kvm_vcpu *vcpu; +}; + +static void vcpu_global_purge(void *info) +{ + struct call_data *p = (struct call_data *)info; + struct kvm_vcpu *vcpu = p->vcpu; + + if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) + return; + + set_bit(KVM_REQ_PTC_G, &vcpu->requests); + if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) { + vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] = + p->ptc_g_data; + } else { + clear_bit(KVM_REQ_PTC_G, &vcpu->requests); + vcpu->arch.ptc_g_count = 0; + set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); + } +} + +static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); + struct kvm *kvm = vcpu->kvm; + struct call_data call_data; + int i; + call_data.ptc_g_data = p->u.ptc_g_data; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state == + KVM_MP_STATE_UNINITIALIZED || + vcpu == kvm->vcpus[i]) + continue; + + if (waitqueue_active(&kvm->vcpus[i]->wq)) + wake_up_interruptible(&kvm->vcpus[i]->wq); + + if (kvm->vcpus[i]->cpu != -1) { + call_data.vcpu = kvm->vcpus[i]; + smp_call_function_single(kvm->vcpus[i]->cpu, + vcpu_global_purge, &call_data, 0, 1); + } else + printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); + + } + return 1; +} + +static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + return 1; +} + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + + ktime_t kt; + long itc_diff; + unsigned long vcpu_now_itc; + + unsigned long expires; + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; + unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; + + if (time_after(vcpu_now_itc, vpd->itm)) { + vcpu->arch.timer_check = 1; + return 1; + } + itc_diff = vpd->itm - vcpu_now_itc; + if (itc_diff < 0) + itc_diff = -itc_diff; + + expires = div64_64(itc_diff, cyc_per_usec); + kt = ktime_set(0, 1000 * expires); + vcpu->arch.ht_active = 1; + hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); + + if (irqchip_in_kernel(vcpu->kvm)) { + vcpu->arch.mp_state = KVM_MP_STATE_HALTED; + kvm_vcpu_block(vcpu); + hrtimer_cancel(p_ht); + vcpu->arch.ht_active = 0; + + if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) + return -EINTR; + return 1; + } else { + printk(KERN_ERR"kvm: Unsupported userspace halt!"); + return 0; + } +} + +static int handle_vm_shutdown(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + return 0; +} + +static int handle_external_interrupt(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return 1; +} + +static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) = { + [EXIT_REASON_VM_PANIC] = handle_vm_error, + [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio, + [EXIT_REASON_PAL_CALL] = handle_pal_call, + [EXIT_REASON_SAL_CALL] = handle_sal_call, + [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6, + [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown, + [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, + [EXIT_REASON_IPI] = handle_ipi, + [EXIT_REASON_PTC_G] = handle_global_purge, + +}; + +static const int kvm_vti_max_exit_handlers = + sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); + +static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu) +{ +} + +static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p_exit_data; + + p_exit_data = kvm_get_exit_data(vcpu); + return p_exit_data->exit_reason; +} + +/* + * The guest has exited. See if we can fix it or if we need userspace + * assistance. + */ +static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + u32 exit_reason = kvm_get_exit_reason(vcpu); + vcpu->arch.last_exit = exit_reason; + + if (exit_reason < kvm_vti_max_exit_handlers + && kvm_vti_exit_handlers[exit_reason]) + return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = exit_reason; + } + return 0; +} + +static inline void vti_set_rr6(unsigned long rr6) +{ + ia64_set_rr(RR6, rr6); + ia64_srlz_i(); +} + +static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) +{ + unsigned long pte; + struct kvm *kvm = vcpu->kvm; + int r; + + /*Insert a pair of tr to map vmm*/ + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); + r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (r < 0) + goto out; + vcpu->arch.vmm_tr_slot = r; + /*Insert a pairt of tr to map data of vm*/ + pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL)); + r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE, + pte, KVM_VM_DATA_SHIFT); + if (r < 0) + goto out; + vcpu->arch.vm_tr_slot = r; + r = 0; +out: + return r; + +} + +static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) +{ + + ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); + ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); + +} + +static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + + if (vcpu->arch.last_run_cpu != cpu || + per_cpu(last_vcpu, cpu) != vcpu) { + per_cpu(last_vcpu, cpu) = vcpu; + vcpu->arch.last_run_cpu = cpu; + kvm_flush_tlb_all(); + } + + vcpu->arch.host_rr6 = ia64_get_rr(RR6); + vti_set_rr6(vcpu->arch.vmm_rr); + return kvm_insert_vmm_mapping(vcpu); +} +static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) +{ + kvm_purge_vmm_mapping(vcpu); + vti_set_rr6(vcpu->arch.host_rr6); +} + +static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + union context *host_ctx, *guest_ctx; + int r; + + /*Get host and guest context with guest address space.*/ + host_ctx = kvm_get_host_context(vcpu); + guest_ctx = kvm_get_guest_context(vcpu); + + r = kvm_vcpu_pre_transition(vcpu); + if (r < 0) + goto out; + kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); + kvm_vcpu_post_transition(vcpu); + r = 0; +out: + return r; +} + +static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + +again: + preempt_disable(); + + kvm_prepare_guest_switch(vcpu); + local_irq_disable(); + + if (signal_pending(current)) { + local_irq_enable(); + preempt_enable(); + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + goto out; + } + + vcpu->guest_mode = 1; + kvm_guest_enter(); + + r = vti_vcpu_run(vcpu, kvm_run); + if (r < 0) { + local_irq_enable(); + preempt_enable(); + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + goto out; + } + + vcpu->arch.launched = 1; + vcpu->guest_mode = 0; + local_irq_enable(); + + /* + * We must have an instruction between local_irq_enable() and + * kvm_guest_exit(), so the timer interrupt isn't delayed by + * the interrupt shadow. The stat.exits increment will do nicely. + * But we need to prevent reordering, hence this barrier(): + */ + barrier(); + + kvm_guest_exit(); + + preempt_enable(); + + r = kvm_handle_exit(kvm_run, vcpu); + + if (r > 0) { + if (!need_resched()) + goto again; + } + +out: + if (r > 0) { + kvm_resched(vcpu); + goto again; + } + + return r; +} + +static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) +{ + struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); + + if (!vcpu->mmio_is_write) + memcpy(&p->data, vcpu->mmio_data, 8); + p->state = STATE_IORESP_READY; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + sigset_t sigsaved; + + vcpu_load(vcpu); + + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { + kvm_vcpu_block(vcpu); + vcpu_put(vcpu); + return -EAGAIN; + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + kvm_set_mmio_data(vcpu); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + } + r = __vcpu_run(vcpu, kvm_run); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu_put(vcpu); + return r; +} + +/* + * Allocate 16M memory for every vm to hold its specific data. + * Its memory map is defined in kvm_host.h. + */ +static struct kvm *kvm_alloc_kvm(void) +{ + + struct kvm *kvm; + uint64_t vm_base; + + vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); + + if (!vm_base) + return ERR_PTR(-ENOMEM); + printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base); + + /* Zero all pages before use! */ + memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); + + kvm = (struct kvm *)(vm_base + KVM_VM_OFS); + kvm->arch.vm_base = vm_base; + + return kvm; +} + +struct kvm_io_range { + unsigned long start; + unsigned long size; + unsigned long type; +}; + +static const struct kvm_io_range io_ranges[] = { + {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, + {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, + {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, + {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC}, + {PIB_START, PIB_SIZE, GPFN_PIB}, +}; + +static void kvm_build_io_pmt(struct kvm *kvm) +{ + unsigned long i, j; + + /* Mark I/O ranges */ + for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range)); + i++) { + for (j = io_ranges[i].start; + j < io_ranges[i].start + io_ranges[i].size; + j += PAGE_SIZE) + kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT, + io_ranges[i].type, 0); + } + +} + +/*Use unused rids to virtualize guest rid.*/ +#define GUEST_PHYSICAL_RR0 0x1739 +#define GUEST_PHYSICAL_RR4 0x2739 +#define VMM_INIT_RR 0x1660 + +static void kvm_init_vm(struct kvm *kvm) +{ + long vm_base; + + BUG_ON(!kvm); + + kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; + kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; + kvm->arch.vmm_init_rr = VMM_INIT_RR; + + vm_base = kvm->arch.vm_base; + if (vm_base) { + kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS; + kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS; + kvm->arch.vpd_base = vm_base + KVM_VPD_OFS; + } + + /* + *Fill P2M entries for MMIO/IO ranges + */ + kvm_build_io_pmt(kvm); + +} + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm = kvm_alloc_kvm(); + + if (IS_ERR(kvm)) + return ERR_PTR(-ENOMEM); + kvm_init_vm(kvm); + + return kvm; + +} + +static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, + struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_IOAPIC: + memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm), + sizeof(struct kvm_ioapic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_IOAPIC: + memcpy(ioapic_irqchip(kvm), + &chip->chip.ioapic, + sizeof(struct kvm_ioapic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + int r; + + vcpu_load(vcpu); + + for (i = 0; i < 16; i++) { + vpd->vgr[i] = regs->vpd.vgr[i]; + vpd->vbgr[i] = regs->vpd.vbgr[i]; + } + for (i = 0; i < 128; i++) + vpd->vcr[i] = regs->vpd.vcr[i]; + vpd->vhpi = regs->vpd.vhpi; + vpd->vnat = regs->vpd.vnat; + vpd->vbnat = regs->vpd.vbnat; + vpd->vpsr = regs->vpd.vpsr; + + vpd->vpr = regs->vpd.vpr; + + r = -EFAULT; + r = copy_from_user(&vcpu->arch.guest, regs->saved_guest, + sizeof(union context)); + if (r) + goto out; + r = copy_from_user(vcpu + 1, regs->saved_stack + + sizeof(struct kvm_vcpu), + IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); + if (r) + goto out; + vcpu->arch.exit_data = + ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data; + + RESTORE_REGS(mp_state); + RESTORE_REGS(vmm_rr); + memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS); + memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS); + RESTORE_REGS(itr_regions); + RESTORE_REGS(dtr_regions); + RESTORE_REGS(tc_regions); + RESTORE_REGS(irq_check); + RESTORE_REGS(itc_check); + RESTORE_REGS(timer_check); + RESTORE_REGS(timer_pending); + RESTORE_REGS(last_itc); + for (i = 0; i < 8; i++) { + vcpu->arch.vrr[i] = regs->vrr[i]; + vcpu->arch.ibr[i] = regs->ibr[i]; + vcpu->arch.dbr[i] = regs->dbr[i]; + } + for (i = 0; i < 4; i++) + vcpu->arch.insvc[i] = regs->insvc[i]; + RESTORE_REGS(xtp); + RESTORE_REGS(metaphysical_rr0); + RESTORE_REGS(metaphysical_rr4); + RESTORE_REGS(metaphysical_saved_rr0); + RESTORE_REGS(metaphysical_saved_rr4); + RESTORE_REGS(fp_psr); + RESTORE_REGS(saved_gp); + + vcpu->arch.irq_new_pending = 1; + vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC); + set_bit(KVM_REQ_RESUME, &vcpu->requests); + + vcpu_put(vcpu); + r = 0; +out: + return r; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + int r = -EINVAL; + + switch (ioctl) { + case KVM_SET_MEMORY_REGION: { + struct kvm_memory_region kvm_mem; + struct kvm_userspace_memory_region kvm_userspace_mem; + + r = -EFAULT; + if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) + goto out; + kvm_userspace_mem.slot = kvm_mem.slot; + kvm_userspace_mem.flags = kvm_mem.flags; + kvm_userspace_mem.guest_phys_addr = + kvm_mem.guest_phys_addr; + kvm_userspace_mem.memory_size = kvm_mem.memory_size; + r = kvm_vm_ioctl_set_memory_region(kvm, + &kvm_userspace_mem, 0); + if (r) + goto out; + break; + } + case KVM_CREATE_IRQCHIP: + r = -EFAULT; + r = kvm_ioapic_init(kvm); + if (r) + goto out; + break; + case KVM_IRQ_LINE: { + struct kvm_irq_level irq_event; + + r = -EFAULT; + if (copy_from_user(&irq_event, argp, sizeof irq_event)) + goto out; + if (irqchip_in_kernel(kvm)) { + mutex_lock(&kvm->lock); + kvm_ioapic_set_irq(kvm->arch.vioapic, + irq_event.irq, + irq_event.level); + mutex_unlock(&kvm->lock); + r = 0; + } + break; + } + case KVM_GET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_get_irqchip(kvm, &chip); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &chip, sizeof chip)) + goto out; + r = 0; + break; + } + case KVM_SET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_set_irqchip(kvm, &chip); + if (r) + goto out; + r = 0; + break; + } + default: + ; + } +out: + return r; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; + +} +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + + return -EINVAL; +} + +static int kvm_alloc_vmm_area(void) +{ + if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) { + kvm_vmm_base = __get_free_pages(GFP_KERNEL, + get_order(KVM_VMM_SIZE)); + if (!kvm_vmm_base) + return -ENOMEM; + + memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); + kvm_vm_buffer = kvm_vmm_base + VMM_SIZE; + + printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n", + kvm_vmm_base, kvm_vm_buffer); + } + + return 0; +} + +static void kvm_free_vmm_area(void) +{ + if (kvm_vmm_base) { + /*Zero this area before free to avoid bits leak!!*/ + memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); + free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE)); + kvm_vmm_base = 0; + kvm_vm_buffer = 0; + kvm_vsa_base = 0; + } +} + +/* + * Make sure that a cpu that is being hot-unplugged does not have any vcpus + * cached on it. Leave it as blank for IA64. + */ +void decache_vcpus_on_cpu(int cpu) +{ +} + +static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +static int vti_init_vpd(struct kvm_vcpu *vcpu) +{ + int i; + union cpuid3_t cpuid3; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (IS_ERR(vpd)) + return PTR_ERR(vpd); + + /* CPUID init */ + for (i = 0; i < 5; i++) + vpd->vcpuid[i] = ia64_get_cpuid(i); + + /* Limit the CPUID number to 5 */ + cpuid3.value = vpd->vcpuid[3]; + cpuid3.number = 4; /* 5 - 1 */ + vpd->vcpuid[3] = cpuid3.value; + + /*Set vac and vdc fields*/ + vpd->vac.a_from_int_cr = 1; + vpd->vac.a_to_int_cr = 1; + vpd->vac.a_from_psr = 1; + vpd->vac.a_from_cpuid = 1; + vpd->vac.a_cover = 1; + vpd->vac.a_bsw = 1; + vpd->vac.a_int = 1; + vpd->vdc.d_vmsw = 1; + + /*Set virtual buffer*/ + vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE; + + return 0; +} + +static int vti_create_vp(struct kvm_vcpu *vcpu) +{ + long ret; + struct vpd *vpd = vcpu->arch.vpd; + unsigned long vmm_ivt; + + vmm_ivt = kvm_vmm_info->vmm_ivt; + + printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt); + + ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0); + + if (ret) { + printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n"); + return -EINVAL; + } + return 0; +} + +static void init_ptce_info(struct kvm_vcpu *vcpu) +{ + ia64_ptce_info_t ptce = {0}; + + ia64_get_ptce(&ptce); + vcpu->arch.ptce_base = ptce.base; + vcpu->arch.ptce_count[0] = ptce.count[0]; + vcpu->arch.ptce_count[1] = ptce.count[1]; + vcpu->arch.ptce_stride[0] = ptce.stride[0]; + vcpu->arch.ptce_stride[1] = ptce.stride[1]; +} + +static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu) +{ + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; + + if (hrtimer_cancel(p_ht)) + hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS); +} + +static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) +{ + struct kvm_vcpu *vcpu; + wait_queue_head_t *q; + + vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); + if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) + goto out; + + q = &vcpu->wq; + if (waitqueue_active(q)) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + wake_up_interruptible(q); + } +out: + vcpu->arch.timer_check = 1; + return HRTIMER_NORESTART; +} + +#define PALE_RESET_ENTRY 0x80000000ffffffb0UL + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *v; + int r; + int i; + long itc_offset; + struct kvm *kvm = vcpu->kvm; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + union context *p_ctx = &vcpu->arch.guest; + struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu); + + /*Init vcpu context for first run.*/ + if (IS_ERR(vmm_vcpu)) + return PTR_ERR(vmm_vcpu); + + if (vcpu->vcpu_id == 0) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + + /*Set entry address for first run.*/ + regs->cr_iip = PALE_RESET_ENTRY; + + /*Initilize itc offset for vcpus*/ + itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + v->arch.itc_offset = itc_offset; + v->arch.last_itc = 0; + } + } else + vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; + + r = -ENOMEM; + vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); + if (!vcpu->arch.apic) + goto out; + vcpu->arch.apic->vcpu = vcpu; + + p_ctx->gr[1] = 0; + p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); + p_ctx->gr[13] = (unsigned long)vmm_vcpu; + p_ctx->psr = 0x1008522000UL; + p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ + p_ctx->caller_unat = 0; + p_ctx->pr = 0x0; + p_ctx->ar[36] = 0x0; /*unat*/ + p_ctx->ar[19] = 0x0; /*rnat*/ + p_ctx->ar[18] = (unsigned long)vmm_vcpu + + ((sizeof(struct kvm_vcpu)+15) & ~15); + p_ctx->ar[64] = 0x0; /*pfs*/ + p_ctx->cr[0] = 0x7e04UL; + p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt; + p_ctx->cr[8] = 0x3c; + + /*Initilize region register*/ + p_ctx->rr[0] = 0x30; + p_ctx->rr[1] = 0x30; + p_ctx->rr[2] = 0x30; + p_ctx->rr[3] = 0x30; + p_ctx->rr[4] = 0x30; + p_ctx->rr[5] = 0x30; + p_ctx->rr[7] = 0x30; + + /*Initilize branch register 0*/ + p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry; + + vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr; + vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0; + vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4; + + hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + vcpu->arch.hlt_timer.function = hlt_timer_fn; + + vcpu->arch.last_run_cpu = -1; + vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); + vcpu->arch.vsa_base = kvm_vsa_base; + vcpu->arch.__gp = kvm_vmm_gp; + vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); + vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); + vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); + init_ptce_info(vcpu); + + r = 0; +out: + return r; +} + +static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) +{ + unsigned long psr; + int r; + + local_irq_save(psr); + r = kvm_insert_vmm_mapping(vcpu); + if (r) + goto fail; + r = kvm_vcpu_init(vcpu, vcpu->kvm, id); + if (r) + goto fail; + + r = vti_init_vpd(vcpu); + if (r) { + printk(KERN_DEBUG"kvm: vpd init error!!\n"); + goto uninit; + } + + r = vti_create_vp(vcpu); + if (r) + goto uninit; + + kvm_purge_vmm_mapping(vcpu); + local_irq_restore(psr); + + return 0; +uninit: + kvm_vcpu_uninit(vcpu); +fail: + return r; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, + unsigned int id) +{ + struct kvm_vcpu *vcpu; + unsigned long vm_base = kvm->arch.vm_base; + int r; + int cpu; + + r = -ENOMEM; + if (!vm_base) { + printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); + goto fail; + } + vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); + vcpu->kvm = kvm; + + cpu = get_cpu(); + vti_vcpu_load(vcpu, cpu); + r = vti_vcpu_setup(vcpu, id); + put_cpu(); + + if (r) { + printk(KERN_DEBUG"kvm: vcpu_setup error!!\n"); + goto fail; + } + + return vcpu; +fail: + return ERR_PTR(r); +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + return -EINVAL; +} + +static void free_kvm(struct kvm *kvm) +{ + unsigned long vm_base = kvm->arch.vm_base; + + if (vm_base) { + memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); + free_pages(vm_base, get_order(KVM_VM_DATA_SIZE)); + } + +} + +static void kvm_release_vm_pages(struct kvm *kvm) +{ + struct kvm_memory_slot *memslot; + int i, j; + unsigned long base_gfn; + + for (i = 0; i < kvm->nmemslots; i++) { + memslot = &kvm->memslots[i]; + base_gfn = memslot->base_gfn; + + for (j = 0; j < memslot->npages; j++) { + if (memslot->rmap[j]) + put_page((struct page *)memslot->rmap[j]); + } + } +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kfree(kvm->arch.vioapic); + kvm_release_vm_pages(kvm); + kvm_free_physmem(kvm); + free_kvm(kvm); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + if (cpu != vcpu->cpu) { + vcpu->cpu = cpu; + if (vcpu->arch.ht_active) + kvm_migrate_hlt_timer(vcpu); + } +} + +#define SAVE_REGS(_x) regs->_x = vcpu->arch._x + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + int r; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + vcpu_load(vcpu); + + for (i = 0; i < 16; i++) { + regs->vpd.vgr[i] = vpd->vgr[i]; + regs->vpd.vbgr[i] = vpd->vbgr[i]; + } + for (i = 0; i < 128; i++) + regs->vpd.vcr[i] = vpd->vcr[i]; + regs->vpd.vhpi = vpd->vhpi; + regs->vpd.vnat = vpd->vnat; + regs->vpd.vbnat = vpd->vbnat; + regs->vpd.vpsr = vpd->vpsr; + regs->vpd.vpr = vpd->vpr; + + r = -EFAULT; + r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, + sizeof(union context)); + if (r) + goto out; + r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); + if (r) + goto out; + SAVE_REGS(mp_state); + SAVE_REGS(vmm_rr); + memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); + memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS); + SAVE_REGS(itr_regions); + SAVE_REGS(dtr_regions); + SAVE_REGS(tc_regions); + SAVE_REGS(irq_check); + SAVE_REGS(itc_check); + SAVE_REGS(timer_check); + SAVE_REGS(timer_pending); + SAVE_REGS(last_itc); + for (i = 0; i < 8; i++) { + regs->vrr[i] = vcpu->arch.vrr[i]; + regs->ibr[i] = vcpu->arch.ibr[i]; + regs->dbr[i] = vcpu->arch.dbr[i]; + } + for (i = 0; i < 4; i++) + regs->insvc[i] = vcpu->arch.insvc[i]; + regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC); + SAVE_REGS(xtp); + SAVE_REGS(metaphysical_rr0); + SAVE_REGS(metaphysical_rr4); + SAVE_REGS(metaphysical_saved_rr0); + SAVE_REGS(metaphysical_saved_rr4); + SAVE_REGS(fp_psr); + SAVE_REGS(saved_gp); + vcpu_put(vcpu); + r = 0; +out: + return r; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + + hrtimer_cancel(&vcpu->arch.hlt_timer); + kfree(vcpu->arch.apic); +} + + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + unsigned long i; + struct page *page; + int npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; + unsigned long base_gfn = memslot->base_gfn; + + for (i = 0; i < npages; i++) { + page = gfn_to_page(kvm, base_gfn + i); + kvm_set_pmt_entry(kvm, base_gfn + i, + page_to_pfn(page) << PAGE_SHIFT, + _PAGE_AR_RWX|_PAGE_MA_WB); + memslot->rmap[i] = (unsigned long)page; + } + + return 0; +} + + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_uninit(vcpu); +} + +static int vti_cpu_has_kvm_support(void) +{ + long avail = 1, status = 1, control = 1; + long ret; + + ret = ia64_pal_proc_get_features(&avail, &status, &control, 0); + if (ret) + goto out; + + if (!(avail & PAL_PROC_VM_BIT)) + goto out; + + printk(KERN_DEBUG"kvm: Hardware Supports VT\n"); + + ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info); + if (ret) + goto out; + printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size); + + if (!(vp_env_info & VP_OPCODE)) { + printk(KERN_WARNING"kvm: No opcode ability on hardware, " + "vm_env_info:0x%lx\n", vp_env_info); + } + + return 1; +out: + return 0; +} + +static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, + struct module *module) +{ + unsigned long module_base; + unsigned long vmm_size; + + unsigned long vmm_offset, func_offset, fdesc_offset; + struct fdesc *p_fdesc; + + BUG_ON(!module); + + if (!kvm_vmm_base) { + printk("kvm: kvm area hasn't been initilized yet!!\n"); + return -EFAULT; + } + + /*Calculate new position of relocated vmm module.*/ + module_base = (unsigned long)module->module_core; + vmm_size = module->core_size; + if (unlikely(vmm_size > KVM_VMM_SIZE)) + return -EFAULT; + + memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); + kvm_flush_icache(kvm_vmm_base, vmm_size); + + /*Recalculate kvm_vmm_info based on new VMM*/ + vmm_offset = vmm_info->vmm_ivt - module_base; + kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset; + printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n", + kvm_vmm_info->vmm_ivt); + + fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base; + kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE + + fdesc_offset); + func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base; + p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); + p_fdesc->ip = KVM_VMM_BASE + func_offset; + p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base); + + printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n", + KVM_VMM_BASE+func_offset); + + fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base; + kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE + + fdesc_offset); + func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base; + p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); + p_fdesc->ip = KVM_VMM_BASE + func_offset; + p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base); + + kvm_vmm_gp = p_fdesc->gp; + + printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n", + kvm_vmm_info->vmm_entry); + printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n", + KVM_VMM_BASE + func_offset); + + return 0; +} + +int kvm_arch_init(void *opaque) +{ + int r; + struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque; + + if (!vti_cpu_has_kvm_support()) { + printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n"); + r = -EOPNOTSUPP; + goto out; + } + + if (kvm_vmm_info) { + printk(KERN_ERR "kvm: Already loaded VMM module!\n"); + r = -EEXIST; + goto out; + } + + r = -ENOMEM; + kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL); + if (!kvm_vmm_info) + goto out; + + if (kvm_alloc_vmm_area()) + goto out_free0; + + r = kvm_relocate_vmm(vmm_info, vmm_info->module); + if (r) + goto out_free1; + + return 0; + +out_free1: + kvm_free_vmm_area(); +out_free0: + kfree(kvm_vmm_info); +out: + return r; +} + +void kvm_arch_exit(void) +{ + kvm_free_vmm_area(); + kfree(kvm_vmm_info); + kvm_vmm_info = NULL; +} + +static int kvm_ia64_sync_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + struct kvm_memory_slot *memslot; + int r, i; + long n, base; + unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS + + KVM_MEM_DIRTY_LOG_OFS); + + r = -EINVAL; + if (log->slot >= KVM_MEMORY_SLOTS) + goto out; + + memslot = &kvm->memslots[log->slot]; + r = -ENOENT; + if (!memslot->dirty_bitmap) + goto out; + + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + base = memslot->base_gfn / BITS_PER_LONG; + + for (i = 0; i < n/sizeof(long); ++i) { + memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; + dirty_bitmap[base + i] = 0; + } + r = 0; +out: + return r; +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + int r; + int n; + struct kvm_memory_slot *memslot; + int is_dirty = 0; + + spin_lock(&kvm->arch.dirty_log_lock); + + r = kvm_ia64_sync_dirty_log(kvm, log); + if (r) + goto out; + + r = kvm_get_dirty_log(kvm, log, &is_dirty); + if (r) + goto out; + + /* If nothing is dirty, don't bother messing with page tables. */ + if (is_dirty) { + kvm_flush_remote_tlbs(kvm); + memslot = &kvm->memslots[log->slot]; + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + memset(memslot->dirty_bitmap, 0, n); + } + r = 0; +out: + spin_unlock(&kvm->arch.dirty_log_lock); + return r; +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +static void vcpu_kick_intr(void *info) +{ +#ifdef DEBUG + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; + printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu); +#endif +} + +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int ipi_pcpu = vcpu->cpu; + + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + + if (vcpu->guest_mode) + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); +} + +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) +{ + + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (!test_and_set_bit(vec, &vpd->irr[0])) { + vcpu->arch.irq_new_pending = 1; + if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) + kvm_vcpu_kick(vcpu); + else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + } + return 1; + } + return 0; +} + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) +{ + return apic->vcpu->vcpu_id == dest; +} + +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) +{ + return 0; +} + +struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, + unsigned long bitmap) +{ + struct kvm_vcpu *lvcpu = kvm->vcpus[0]; + int i; + + for (i = 1; i < KVM_MAX_VCPUS; i++) { + if (!kvm->vcpus[i]) + continue; + if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) + lvcpu = kvm->vcpus[i]; + } + + return lvcpu; +} + +static int find_highest_bits(int *dat) +{ + u32 bits, bitnum; + int i; + + /* loop for all 256 bits */ + for (i = 7; i >= 0 ; i--) { + bits = dat[i]; + if (bits) { + bitnum = fls(bits); + return i * 32 + bitnum - 1; + } + } + + return -1; +} + +int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) +{ + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (vpd->irr[0] & (1UL << NMI_VECTOR)) + return NMI_VECTOR; + if (vpd->irr[0] & (1UL << ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return find_highest_bits((int *)&vpd->irr[0]); +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + if (kvm_highest_pending_irq(vcpu) != -1) + return 1; + return 0; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c new file mode 100644 index 00000000000..091f936c448 --- /dev/null +++ b/arch/ia64/kvm/kvm_fw.c @@ -0,0 +1,500 @@ +/* + * PAL/SAL call delegation + * + * Copyright (c) 2004 Li Susie <susie.li@intel.com> + * Copyright (c) 2005 Yu Ke <ke.yu@intel.com> + * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <linux/kvm_host.h> +#include <linux/smp.h> + +#include "vti.h" +#include "misc.h" + +#include <asm/pal.h> +#include <asm/sal.h> +#include <asm/tlb.h> + +/* + * Handy macros to make sure that the PAL return values start out + * as something meaningful. + */ +#define INIT_PAL_STATUS_UNIMPLEMENTED(x) \ + { \ + x.status = PAL_STATUS_UNIMPLEMENTED; \ + x.v0 = 0; \ + x.v1 = 0; \ + x.v2 = 0; \ + } + +#define INIT_PAL_STATUS_SUCCESS(x) \ + { \ + x.status = PAL_STATUS_SUCCESS; \ + x.v0 = 0; \ + x.v1 = 0; \ + x.v2 = 0; \ + } + +static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu, + u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) { + struct exit_ctl_data *p; + + if (vcpu) { + p = &vcpu->arch.exit_data; + if (p->exit_reason == EXIT_REASON_PAL_CALL) { + *gr28 = p->u.pal_data.gr28; + *gr29 = p->u.pal_data.gr29; + *gr30 = p->u.pal_data.gr30; + *gr31 = p->u.pal_data.gr31; + return ; + } + } + printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n"); +} + +static void set_pal_result(struct kvm_vcpu *vcpu, + struct ia64_pal_retval result) { + + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && p->exit_reason == EXIT_REASON_PAL_CALL) { + p->u.pal_data.ret = result; + return ; + } + INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret); +} + +static void set_sal_result(struct kvm_vcpu *vcpu, + struct sal_ret_values result) { + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && p->exit_reason == EXIT_REASON_SAL_CALL) { + p->u.sal_data.ret = result; + return ; + } + printk(KERN_WARNING"Failed to set sal result!!\n"); +} + +struct cache_flush_args { + u64 cache_type; + u64 operation; + u64 progress; + long status; +}; + +cpumask_t cpu_cache_coherent_map; + +static void remote_pal_cache_flush(void *data) +{ + struct cache_flush_args *args = data; + long status; + u64 progress = args->progress; + + status = ia64_pal_cache_flush(args->cache_type, args->operation, + &progress, NULL); + if (status != 0) + args->status = status; +} + +static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu) +{ + u64 gr28, gr29, gr30, gr31; + struct ia64_pal_retval result = {0, 0, 0, 0}; + struct cache_flush_args args = {0, 0, 0, 0}; + long psr; + + gr28 = gr29 = gr30 = gr31 = 0; + kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31); + + if (gr31 != 0) + printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu); + + /* Always call Host Pal in int=1 */ + gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS; + args.cache_type = gr29; + args.operation = gr30; + smp_call_function(remote_pal_cache_flush, + (void *)&args, 1, 1); + if (args.status != 0) + printk(KERN_ERR"pal_cache_flush error!," + "status:0x%lx\n", args.status); + /* + * Call Host PAL cache flush + * Clear psr.ic when call PAL_CACHE_FLUSH + */ + local_irq_save(psr); + result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1, + &result.v0); + local_irq_restore(psr); + if (result.status != 0) + printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld" + "in1:%lx,in2:%lx\n", + vcpu, result.status, gr29, gr30); + +#if 0 + if (gr29 == PAL_CACHE_TYPE_COHERENT) { + cpus_setall(vcpu->arch.cache_coherent_map); + cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map); + cpus_setall(cpu_cache_coherent_map); + cpu_clear(vcpu->cpu, cpu_cache_coherent_map); + } +#endif + return result; +} + +struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0); + return result; +} + +static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0); + + /* + * PAL_FREQ_BASE may not be implemented in some platforms, + * call SAL instead. + */ + if (result.v0 == 0) { + result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, + &result.v0, + &result.v1); + result.v2 = 0; + } + + return result; +} + +static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); + return result; +} + +static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu) +{ + struct ia64_pal_retval result; + + INIT_PAL_STATUS_UNIMPLEMENTED(result); + return result; +} + +static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + INIT_PAL_STATUS_SUCCESS(result); + return result; +} + +static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result = {0, 0, 0, 0}; + long in0, in1, in2, in3; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + result.status = ia64_pal_proc_get_features(&result.v0, &result.v1, + &result.v2, in2); + + return result; +} + +static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu) +{ + + pal_cache_config_info_t ci; + long status; + unsigned long in0, in1, in2, in3, r9, r10; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + status = ia64_pal_cache_config_info(in1, in2, &ci); + r9 = ci.pcci_info_1.pcci1_data; + r10 = ci.pcci_info_2.pcci2_data; + return ((struct ia64_pal_retval){status, r9, r10, 0}); +} + +#define GUEST_IMPL_VA_MSB 59 +#define GUEST_RID_BITS 18 + +static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu) +{ + + pal_vm_info_1_u_t vminfo1; + pal_vm_info_2_u_t vminfo2; + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0); + if (!result.status) { + vminfo1.pvi1_val = result.v0; + vminfo1.pal_vm_info_1_s.max_itr_entry = 8; + vminfo1.pal_vm_info_1_s.max_dtr_entry = 8; + result.v0 = vminfo1.pvi1_val; + vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB; + vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS; + result.v1 = vminfo2.pvi2_val; + } + + return result; +} + +static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu) +{ + struct ia64_pal_retval result; + + INIT_PAL_STATUS_UNIMPLEMENTED(result); + + return result; +} + +static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu) +{ + u64 index = 0; + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && (p->exit_reason == EXIT_REASON_PAL_CALL)) + index = p->u.pal_data.gr28; + + return index; +} + +int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + + u64 gr28; + struct ia64_pal_retval result; + int ret = 1; + + gr28 = kvm_get_pal_call_index(vcpu); + /*printk("pal_call index:%lx\n",gr28);*/ + switch (gr28) { + case PAL_CACHE_FLUSH: + result = pal_cache_flush(vcpu); + break; + case PAL_CACHE_SUMMARY: + result = pal_cache_summary(vcpu); + break; + case PAL_HALT_LIGHT: + { + vcpu->arch.timer_pending = 1; + INIT_PAL_STATUS_SUCCESS(result); + if (kvm_highest_pending_irq(vcpu) == -1) + ret = kvm_emulate_halt(vcpu); + + } + break; + + case PAL_FREQ_RATIOS: + result = pal_freq_ratios(vcpu); + break; + + case PAL_FREQ_BASE: + result = pal_freq_base(vcpu); + break; + + case PAL_LOGICAL_TO_PHYSICAL : + result = pal_logical_to_physica(vcpu); + break; + + case PAL_VM_SUMMARY : + result = pal_vm_summary(vcpu); + break; + + case PAL_VM_INFO : + result = pal_vm_info(vcpu); + break; + case PAL_PLATFORM_ADDR : + result = pal_platform_addr(vcpu); + break; + case PAL_CACHE_INFO: + result = pal_cache_info(vcpu); + break; + case PAL_PTCE_INFO: + INIT_PAL_STATUS_SUCCESS(result); + result.v1 = (1L << 32) | 1L; + break; + case PAL_VM_PAGE_SIZE: + result.status = ia64_pal_vm_page_size(&result.v0, + &result.v1); + break; + case PAL_RSE_INFO: + result.status = ia64_pal_rse_info(&result.v0, + (pal_hints_u_t *)&result.v1); + break; + case PAL_PROC_GET_FEATURES: + result = pal_proc_get_features(vcpu); + break; + case PAL_DEBUG_INFO: + result.status = ia64_pal_debug_info(&result.v0, + &result.v1); + break; + case PAL_VERSION: + result.status = ia64_pal_version( + (pal_version_u_t *)&result.v0, + (pal_version_u_t *)&result.v1); + + break; + case PAL_FIXED_ADDR: + result.status = PAL_STATUS_SUCCESS; + result.v0 = vcpu->vcpu_id; + break; + default: + INIT_PAL_STATUS_UNIMPLEMENTED(result); + printk(KERN_WARNING"kvm: Unsupported pal call," + " index:0x%lx\n", gr28); + } + set_pal_result(vcpu, result); + return ret; +} + +static struct sal_ret_values sal_emulator(struct kvm *kvm, + long index, unsigned long in1, + unsigned long in2, unsigned long in3, + unsigned long in4, unsigned long in5, + unsigned long in6, unsigned long in7) +{ + unsigned long r9 = 0; + unsigned long r10 = 0; + long r11 = 0; + long status; + + status = 0; + switch (index) { + case SAL_FREQ_BASE: + status = ia64_sal_freq_base(in1, &r9, &r10); + break; + case SAL_PCI_CONFIG_READ: + printk(KERN_WARNING"kvm: Not allowed to call here!" + " SAL_PCI_CONFIG_READ\n"); + break; + case SAL_PCI_CONFIG_WRITE: + printk(KERN_WARNING"kvm: Not allowed to call here!" + " SAL_PCI_CONFIG_WRITE\n"); + break; + case SAL_SET_VECTORS: + if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) { + if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) { + status = -2; + } else { + kvm->arch.rdv_sal_data.boot_ip = in2; + kvm->arch.rdv_sal_data.boot_gp = in3; + } + printk("Rendvous called! iip:%lx\n\n", in2); + } else + printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu." + "ignored...\n", in1); + break; + case SAL_GET_STATE_INFO: + /* No more info. */ + status = -5; + r9 = 0; + break; + case SAL_GET_STATE_INFO_SIZE: + /* Return a dummy size. */ + status = 0; + r9 = 128; + break; + case SAL_CLEAR_STATE_INFO: + /* Noop. */ + break; + case SAL_MC_RENDEZ: + printk(KERN_WARNING + "kvm: called SAL_MC_RENDEZ. ignored...\n"); + break; + case SAL_MC_SET_PARAMS: + printk(KERN_WARNING + "kvm: called SAL_MC_SET_PARAMS.ignored!\n"); + break; + case SAL_CACHE_FLUSH: + if (1) { + /*Flush using SAL. + This method is faster but has a side + effect on other vcpu running on + this cpu. */ + status = ia64_sal_cache_flush(in1); + } else { + /*Maybe need to implement the method + without side effect!*/ + status = 0; + } + break; + case SAL_CACHE_INIT: + printk(KERN_WARNING + "kvm: called SAL_CACHE_INIT. ignored...\n"); + break; + case SAL_UPDATE_PAL: + printk(KERN_WARNING + "kvm: CALLED SAL_UPDATE_PAL. ignored...\n"); + break; + default: + printk(KERN_WARNING"kvm: called SAL_CALL with unknown index." + " index:%ld\n", index); + status = -1; + break; + } + return ((struct sal_ret_values) {status, r9, r10, r11}); +} + +static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1, + u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){ + + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p) { + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + *in0 = p->u.sal_data.in0; + *in1 = p->u.sal_data.in1; + *in2 = p->u.sal_data.in2; + *in3 = p->u.sal_data.in3; + *in4 = p->u.sal_data.in4; + *in5 = p->u.sal_data.in5; + *in6 = p->u.sal_data.in6; + *in7 = p->u.sal_data.in7; + return ; + } + } + *in0 = 0; +} + +void kvm_sal_emul(struct kvm_vcpu *vcpu) +{ + + struct sal_ret_values result; + u64 index, in1, in2, in3, in4, in5, in6, in7; + + kvm_get_sal_call_data(vcpu, &index, &in1, &in2, + &in3, &in4, &in5, &in6, &in7); + result = sal_emulator(vcpu->kvm, index, in1, in2, in3, + in4, in5, in6, in7); + set_sal_result(vcpu, result); +} diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h new file mode 100644 index 00000000000..13980d9b8bc --- /dev/null +++ b/arch/ia64/kvm/kvm_minstate.h @@ -0,0 +1,273 @@ +/* + * kvm_minstate.h: min save macros + * Copyright (c) 2007, Intel Corporation. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + + +#include <asm/asmmacro.h> +#include <asm/types.h> +#include <asm/kregs.h> +#include "asm-offsets.h" + +#define KVM_MINSTATE_START_SAVE_MIN \ + mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\ + ;; \ + mov.m r28 = ar.rnat; \ + addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ + ;; \ + lfetch.fault.excl.nt1 [r22]; \ + addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ + mov r23 = ar.bspstore; /* save ar.bspstore */ \ + ;; \ + mov ar.bspstore = r22; /* switch to kernel RBS */\ + ;; \ + mov r18 = ar.bsp; \ + mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ + + + +#define KVM_MINSTATE_END_SAVE_MIN \ + bsw.1; /* switch back to bank 1 (must be last in insn group) */\ + ;; + + +#define PAL_VSA_SYNC_READ \ + /* begin to call pal vps sync_read */ \ + add r25 = VMM_VPD_BASE_OFFSET, r21; \ + adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21; /* entry point */ \ + ;; \ + ld8 r25 = [r25]; /* read vpd base */ \ + ld8 r20 = [r20]; \ + ;; \ + add r20 = PAL_VPS_SYNC_READ,r20; \ + ;; \ +{ .mii; \ + nop 0x0; \ + mov r24 = ip; \ + mov b0 = r20; \ + ;; \ +}; \ +{ .mmb; \ + add r24 = 0x20, r24; \ + nop 0x0; \ + br.cond.sptk b0; /* call the service */ \ + ;; \ +}; + + + +#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21 + +/* + * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * r2 = points to &pt_regs.r16 + * r8 = contents of ar.ccv + * r9 = contents of ar.csd + * r10 = contents of ar.ssd + * r11 = FPSR_DEFAULT + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ + + +#define PT(f) (VMM_PT_REGS_##f##_OFFSET) + +#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ + KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ + mov r27 = ar.rsc; /* M */ \ + mov r20 = r1; /* A */ \ + mov r25 = ar.unat; /* M */ \ + mov r29 = cr.ipsr; /* M */ \ + mov r26 = ar.pfs; /* I */ \ + mov r18 = cr.isr; \ + COVER; /* B;; (or nothing) */ \ + ;; \ + tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \ + mov r1 = r16; \ +/* mov r21=r16; */ \ + /* switch from user to kernel RBS: */ \ + ;; \ + invala; /* M */ \ + SAVE_IFS; \ + ;; \ + KVM_MINSTATE_START_SAVE_MIN \ + adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \ + adds r16 = PT(CR_IPSR),r1; \ + ;; \ + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ + st8 [r16] = r29; /* save cr.ipsr */ \ + ;; \ + lfetch.fault.excl.nt1 [r17]; \ + tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \ + mov r29 = b0 \ + ;; \ + adds r16 = PT(R8),r1; /* initialize first base pointer */\ + adds r17 = PT(R9),r1; /* initialize second base pointer */\ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r8,16; \ +.mem.offset 8,0; st8.spill [r17] = r9,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r10,24; \ +.mem.offset 8,0; st8.spill [r17] = r11,24; \ + ;; \ + mov r9 = cr.iip; /* M */ \ + mov r10 = ar.fpsr; /* M */ \ + ;; \ + st8 [r16] = r9,16; /* save cr.iip */ \ + st8 [r17] = r30,16; /* save cr.ifs */ \ + sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \ + ;; \ + st8 [r16] = r25,16; /* save ar.unat */ \ + st8 [r17] = r26,16; /* save ar.pfs */ \ + shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\ + ;; \ + st8 [r16] = r27,16; /* save ar.rsc */ \ + st8 [r17] = r28,16; /* save ar.rnat */ \ + ;; /* avoid RAW on r16 & r17 */ \ + st8 [r16] = r23,16; /* save ar.bspstore */ \ + st8 [r17] = r31,16; /* save predicates */ \ + ;; \ + st8 [r16] = r29,16; /* save b0 */ \ + st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \ +.mem.offset 8,0; st8.spill [r17] = r12,16; \ + adds r12 = -16,r1; /* switch to kernel memory stack */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r13,16; \ +.mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\ + mov r13 = r21; /* establish `current' */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r15,16; \ +.mem.offset 8,0; st8.spill [r17] = r14,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r2,16; \ +.mem.offset 8,0; st8.spill [r17] = r3,16; \ + adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \ + ;; \ + adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \ + adds r17 = VMM_VCPU_ISR_OFFSET,r13; \ + mov r26 = cr.iipa; \ + mov r27 = cr.isr; \ + ;; \ + st8 [r16] = r26; \ + st8 [r17] = r27; \ + ;; \ + EXTRA; \ + mov r8 = ar.ccv; \ + mov r9 = ar.csd; \ + mov r10 = ar.ssd; \ + movl r11 = FPSR_DEFAULT; /* L-unit */ \ + adds r17 = VMM_VCPU_GP_OFFSET,r13; \ + ;; \ + ld8 r1 = [r17];/* establish kernel global pointer */ \ + ;; \ + PAL_VSA_SYNC_READ \ + KVM_MINSTATE_END_SAVE_MIN + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). + * + * Assumed state upon entry: + * psr.ic: on + * r2: points to &pt_regs.f6 + * r3: points to &pt_regs.f7 + * r8: contents of ar.ccv + * r9: contents of ar.csd + * r10: contents of ar.ssd + * r11: FPSR_DEFAULT + * + * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. + */ +#define KVM_SAVE_REST \ +.mem.offset 0,0; st8.spill [r2] = r16,16; \ +.mem.offset 8,0; st8.spill [r3] = r17,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r18,16; \ +.mem.offset 8,0; st8.spill [r3] = r19,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r20,16; \ +.mem.offset 8,0; st8.spill [r3] = r21,16; \ + mov r18=b6; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r22,16; \ +.mem.offset 8,0; st8.spill [r3] = r23,16; \ + mov r19 = b7; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r24,16; \ +.mem.offset 8,0; st8.spill [r3] = r25,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r26,16; \ +.mem.offset 8,0; st8.spill [r3] = r27,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r28,16; \ +.mem.offset 8,0; st8.spill [r3] = r29,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r30,16; \ +.mem.offset 8,0; st8.spill [r3] = r31,32; \ + ;; \ + mov ar.fpsr = r11; \ + st8 [r2] = r8,8; \ + adds r24 = PT(B6)-PT(F7),r3; \ + adds r25 = PT(B7)-PT(F7),r3; \ + ;; \ + st8 [r24] = r18,16; /* b6 */ \ + st8 [r25] = r19,16; /* b7 */ \ + adds r2 = PT(R4)-PT(F6),r2; \ + adds r3 = PT(R5)-PT(F7),r3; \ + ;; \ + st8 [r24] = r9; /* ar.csd */ \ + st8 [r25] = r10; /* ar.ssd */ \ + ;; \ + mov r18 = ar.unat; \ + adds r19 = PT(EML_UNAT)-PT(R4),r2; \ + ;; \ + st8 [r19] = r18; /* eml_unat */ \ + + +#define KVM_SAVE_EXTRA \ +.mem.offset 0,0; st8.spill [r2] = r4,16; \ +.mem.offset 8,0; st8.spill [r3] = r5,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r6,16; \ +.mem.offset 8,0; st8.spill [r3] = r7; \ + ;; \ + mov r26 = ar.unat; \ + ;; \ + st8 [r2] = r26;/* eml_unat */ \ + +#define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,) +#define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19) +#define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, ) diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h new file mode 100644 index 00000000000..6d6cbcb1489 --- /dev/null +++ b/arch/ia64/kvm/lapic.h @@ -0,0 +1,25 @@ +#ifndef __KVM_IA64_LAPIC_H +#define __KVM_IA64_LAPIC_H + +#include <linux/kvm_host.h> + +/* + * vlsapic + */ +struct kvm_lapic{ + struct kvm_vcpu *vcpu; + uint64_t insvc[4]; + uint64_t vhpi; + uint8_t xtp; + uint8_t pal_init_pending; + uint8_t pad[2]; +}; + +int kvm_create_lapic(struct kvm_vcpu *vcpu); +void kvm_free_lapic(struct kvm_vcpu *vcpu); + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); + +#endif diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h new file mode 100644 index 00000000000..e585c460734 --- /dev/null +++ b/arch/ia64/kvm/misc.h @@ -0,0 +1,93 @@ +#ifndef __KVM_IA64_MISC_H +#define __KVM_IA64_MISC_H + +#include <linux/kvm_host.h> +/* + * misc.h + * Copyright (C) 2007, Intel Corporation. + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +/* + *Return p2m base address at host side! + */ +static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) +{ + return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); +} + +static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, + u64 paddr, u64 mem_flags) +{ + uint64_t *pmt_base = kvm_host_get_pmt(kvm); + unsigned long pte; + + pte = PAGE_ALIGN(paddr) | mem_flags; + pmt_base[gfn] = pte; +} + +/*Function for translating host address to guest address*/ + +static inline void *to_guest(struct kvm *kvm, void *addr) +{ + return (void *)((unsigned long)(addr) - kvm->arch.vm_base + + KVM_VM_DATA_BASE); +} + +/*Function for translating guest address to host address*/ + +static inline void *to_host(struct kvm *kvm, void *addr) +{ + return (void *)((unsigned long)addr - KVM_VM_DATA_BASE + + kvm->arch.vm_base); +} + +/* Get host context of the vcpu */ +static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu) +{ + union context *ctx = &vcpu->arch.host; + return to_guest(vcpu->kvm, ctx); +} + +/* Get guest context of the vcpu */ +static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu) +{ + union context *ctx = &vcpu->arch.guest; + return to_guest(vcpu->kvm, ctx); +} + +/* kvm get exit data from gvmm! */ +static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.exit_data; +} + +/*kvm get vcpu ioreq for kvm module!*/ +static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p_ctl_data; + + if (vcpu) { + p_ctl_data = kvm_get_exit_data(vcpu); + if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION) + return &p_ctl_data->u.ioreq; + } + + return NULL; +} + +#endif diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c new file mode 100644 index 00000000000..351bf70da46 --- /dev/null +++ b/arch/ia64/kvm/mmio.c @@ -0,0 +1,341 @@ +/* + * mmio.c: MMIO emulation components. + * Copyright (c) 2004, Intel Corporation. + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * Kun Tian (Kevin Tian) (Kevin.tian@intel.com) + * + * Copyright (c) 2007 Intel Corporation KVM support. + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/kvm_host.h> + +#include "vcpu.h" + +static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val) +{ + VLSAPIC_XTP(v) = val; +} + +/* + * LSAPIC OFFSET + */ +#define PIB_LOW_HALF(ofst) !(ofst & (1 << 20)) +#define PIB_OFST_INTA 0x1E0000 +#define PIB_OFST_XTP 0x1E0008 + +/* + * execute write IPI op. + */ +static void vlsapic_write_ipi(struct kvm_vcpu *vcpu, + uint64_t addr, uint64_t data) +{ + struct exit_ctl_data *p = ¤t_vcpu->arch.exit_data; + unsigned long psr; + + local_irq_save(psr); + + p->exit_reason = EXIT_REASON_IPI; + p->u.ipi_data.addr.val = addr; + p->u.ipi_data.data.val = data; + vmm_transition(current_vcpu); + + local_irq_restore(psr); + +} + +void lsapic_write(struct kvm_vcpu *v, unsigned long addr, + unsigned long length, unsigned long val) +{ + addr &= (PIB_SIZE - 1); + + switch (addr) { + case PIB_OFST_INTA: + /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ + panic_vm(v); + break; + case PIB_OFST_XTP: + if (length == 1) { + vlsapic_write_xtp(v, val); + } else { + /*panic_domain(NULL, + "Undefined write on PIB XTP\n");*/ + panic_vm(v); + } + break; + default: + if (PIB_LOW_HALF(addr)) { + /*lower half */ + if (length != 8) + /*panic_domain(NULL, + "Can't LHF write with size %ld!\n", + length);*/ + panic_vm(v); + else + vlsapic_write_ipi(v, addr, val); + } else { /* upper half + printk("IPI-UHF write %lx\n",addr);*/ + panic_vm(v); + } + break; + } +} + +unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr, + unsigned long length) +{ + uint64_t result = 0; + + addr &= (PIB_SIZE - 1); + + switch (addr) { + case PIB_OFST_INTA: + if (length == 1) /* 1 byte load */ + ; /* There is no i8259, there is no INTA access*/ + else + /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ + panic_vm(v); + + break; + case PIB_OFST_XTP: + if (length == 1) { + result = VLSAPIC_XTP(v); + /* printk("read xtp %lx\n", result); */ + } else { + /*panic_domain(NULL, + "Undefined read on PIB XTP\n");*/ + panic_vm(v); + } + break; + default: + panic_vm(v); + break; + } + return result; +} + +static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest, + u16 s, int ma, int dir) +{ + unsigned long iot; + struct exit_ctl_data *p = &vcpu->arch.exit_data; + unsigned long psr; + + iot = __gpfn_is_io(src_pa >> PAGE_SHIFT); + + local_irq_save(psr); + + /*Intercept the acces for PIB range*/ + if (iot == GPFN_PIB) { + if (!dir) + lsapic_write(vcpu, src_pa, s, *dest); + else + *dest = lsapic_read(vcpu, src_pa, s); + goto out; + } + p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION; + p->u.ioreq.addr = src_pa; + p->u.ioreq.size = s; + p->u.ioreq.dir = dir; + if (dir == IOREQ_WRITE) + p->u.ioreq.data = *dest; + p->u.ioreq.state = STATE_IOREQ_READY; + vmm_transition(vcpu); + + if (p->u.ioreq.state == STATE_IORESP_READY) { + if (dir == IOREQ_READ) + *dest = p->u.ioreq.data; + } else + panic_vm(vcpu); +out: + local_irq_restore(psr); + return ; +} + +/* + dir 1: read 0:write + inst_type 0:integer 1:floating point + */ +#define SL_INTEGER 0 /* store/load interger*/ +#define SL_FLOATING 1 /* store/load floating*/ + +void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) +{ + struct kvm_pt_regs *regs; + IA64_BUNDLE bundle; + int slot, dir = 0; + int inst_type = -1; + u16 size = 0; + u64 data, slot1a, slot1b, temp, update_reg; + s32 imm; + INST64 inst; + + regs = vcpu_regs(vcpu); + + if (fetch_code(vcpu, regs->cr_iip, &bundle)) { + /* if fetch code fail, return and try again */ + return; + } + slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; + if (!slot) + inst.inst = bundle.slot0; + else if (slot == 1) { + slot1a = bundle.slot1a; + slot1b = bundle.slot1b; + inst.inst = slot1a + (slot1b << 18); + } else if (slot == 2) + inst.inst = bundle.slot2; + + /* Integer Load/Store */ + if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) { + inst_type = SL_INTEGER; + size = (inst.M1.x6 & 0x3); + if ((inst.M1.x6 >> 2) > 0xb) { + /*write*/ + dir = IOREQ_WRITE; + data = vcpu_get_gr(vcpu, inst.M4.r2); + } else if ((inst.M1.x6 >> 2) < 0xb) { + /*read*/ + dir = IOREQ_READ; + } + } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) { + /* Integer Load + Reg update */ + inst_type = SL_INTEGER; + dir = IOREQ_READ; + size = (inst.M2.x6 & 0x3); + temp = vcpu_get_gr(vcpu, inst.M2.r3); + update_reg = vcpu_get_gr(vcpu, inst.M2.r2); + temp += update_reg; + vcpu_set_gr(vcpu, inst.M2.r3, temp, 0); + } else if (inst.M3.major == 5) { + /*Integer Load/Store + Imm update*/ + inst_type = SL_INTEGER; + size = (inst.M3.x6&0x3); + if ((inst.M5.x6 >> 2) > 0xb) { + /*write*/ + dir = IOREQ_WRITE; + data = vcpu_get_gr(vcpu, inst.M5.r2); + temp = vcpu_get_gr(vcpu, inst.M5.r3); + imm = (inst.M5.s << 31) | (inst.M5.i << 30) | + (inst.M5.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M5.r3, temp, 0); + + } else if ((inst.M3.x6 >> 2) < 0xb) { + /*read*/ + dir = IOREQ_READ; + temp = vcpu_get_gr(vcpu, inst.M3.r3); + imm = (inst.M3.s << 31) | (inst.M3.i << 30) | + (inst.M3.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M3.r3, temp, 0); + + } + } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B + && inst.M9.m == 0 && inst.M9.x == 0) { + /* Floating-point spill*/ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + vcpu_get_fpreg(vcpu, inst.M9.f2, &v); + /* Write high word. FIXME: this is a kludge! */ + v.u.bits[1] &= 0x3ffff; + mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + data = v.u.bits[0]; + size = 3; + } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { + /* Floating-point spill + Imm update */ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + vcpu_get_fpreg(vcpu, inst.M10.f2, &v); + temp = vcpu_get_gr(vcpu, inst.M10.r3); + imm = (inst.M10.s << 31) | (inst.M10.i << 30) | + (inst.M10.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); + + /* Write high word.FIXME: this is a kludge! */ + v.u.bits[1] &= 0x3ffff; + mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + data = v.u.bits[0]; + size = 3; + } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) { + /* Floating-point stf8 + Imm update */ + struct ia64_fpreg v; + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + size = 3; + vcpu_get_fpreg(vcpu, inst.M10.f2, &v); + data = v.u.bits[0]; /* Significand. */ + temp = vcpu_get_gr(vcpu, inst.M10.r3); + imm = (inst.M10.s << 31) | (inst.M10.i << 30) | + (inst.M10.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); + } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c + && inst.M15.x6 <= 0x2f) { + temp = vcpu_get_gr(vcpu, inst.M15.r3); + imm = (inst.M15.s << 31) | (inst.M15.i << 30) | + (inst.M15.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M15.r3, temp, 0); + + vcpu_increment_iip(vcpu); + return; + } else if (inst.M12.major == 6 && inst.M12.m == 1 + && inst.M12.x == 1 && inst.M12.x6 == 1) { + /* Floating-point Load Pair + Imm ldfp8 M12*/ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_READ; + size = 8; /*ldfd*/ + mmio_access(vcpu, padr, &data, size, ma, dir); + v.u.bits[0] = data; + v.u.bits[1] = 0x1003E; + vcpu_set_fpreg(vcpu, inst.M12.f1, &v); + padr += 8; + mmio_access(vcpu, padr, &data, size, ma, dir); + v.u.bits[0] = data; + v.u.bits[1] = 0x1003E; + vcpu_set_fpreg(vcpu, inst.M12.f2, &v); + padr += 8; + vcpu_set_gr(vcpu, inst.M12.r3, padr, 0); + vcpu_increment_iip(vcpu); + return; + } else { + inst_type = -1; + panic_vm(vcpu); + } + + size = 1 << size; + if (dir == IOREQ_WRITE) { + mmio_access(vcpu, padr, &data, size, ma, dir); + } else { + mmio_access(vcpu, padr, &data, size, ma, dir); + if (inst_type == SL_INTEGER) + vcpu_set_gr(vcpu, inst.M1.r1, data, 0); + else + panic_vm(vcpu); + + } + vcpu_increment_iip(vcpu); +} diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S new file mode 100644 index 00000000000..e4f15d641b2 --- /dev/null +++ b/arch/ia64/kvm/optvfault.S @@ -0,0 +1,918 @@ +/* + * arch/ia64/vmx/optvfault.S + * optimize virtualization fault handler + * + * Copyright (C) 2006 Intel Co + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> + */ + +#include <asm/asmmacro.h> +#include <asm/processor.h> + +#include "vti.h" +#include "asm-offsets.h" + +#define ACCE_MOV_FROM_AR +#define ACCE_MOV_FROM_RR +#define ACCE_MOV_TO_RR +#define ACCE_RSM +#define ACCE_SSM +#define ACCE_MOV_TO_PSR +#define ACCE_THASH + +//mov r1=ar3 +GLOBAL_ENTRY(kvm_asm_mov_from_ar) +#ifndef ACCE_MOV_FROM_AR + br.many kvm_virtualization_fault_back +#endif + add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 + add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 + extr.u r17=r25,6,7 + ;; + ld8 r18=[r18] + mov r19=ar.itc + mov r24=b0 + ;; + add r19=r19,r18 + addl r20=@gprel(asm_mov_to_reg),gp + ;; + st8 [r16] = r19 + adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 + shladd r17=r17,4,r20 + ;; + mov b0=r17 + br.sptk.few b0 + ;; +END(kvm_asm_mov_from_ar) + + +// mov r1=rr[r3] +GLOBAL_ENTRY(kvm_asm_mov_from_rr) +#ifndef ACCE_MOV_FROM_RR + br.many kvm_virtualization_fault_back +#endif + extr.u r16=r25,20,7 + extr.u r17=r25,6,7 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20 + shladd r16=r16,4,r20 + mov r24=b0 + ;; + add r27=VMM_VCPU_VRR0_OFFSET,r21 + mov b0=r16 + br.many b0 + ;; +kvm_asm_mov_from_rr_back_1: + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + adds r22=asm_mov_to_reg-asm_mov_from_reg,r20 + shr.u r26=r19,61 + ;; + shladd r17=r17,4,r22 + shladd r27=r26,3,r27 + ;; + ld8 r19=[r27] + mov b0=r17 + br.many b0 +END(kvm_asm_mov_from_rr) + + +// mov rr[r3]=r2 +GLOBAL_ENTRY(kvm_asm_mov_to_rr) +#ifndef ACCE_MOV_TO_RR + br.many kvm_virtualization_fault_back +#endif + extr.u r16=r25,20,7 + extr.u r17=r25,13,7 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20 + shladd r16=r16,4,r20 + mov r22=b0 + ;; + add r27=VMM_VCPU_VRR0_OFFSET,r21 + mov b0=r16 + br.many b0 + ;; +kvm_asm_mov_to_rr_back_1: + adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20 + shr.u r23=r19,61 + shladd r17=r17,4,r20 + ;; + //if rr6, go back + cmp.eq p6,p0=6,r23 + mov b0=r22 + (p6) br.cond.dpnt.many kvm_virtualization_fault_back + ;; + mov r28=r19 + mov b0=r17 + br.many b0 +kvm_asm_mov_to_rr_back_2: + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + shladd r27=r23,3,r27 + ;; // vrr.rid<<4 |0xe + st8 [r27]=r19 + mov b0=r30 + ;; + extr.u r16=r19,8,26 + extr.u r18 =r19,2,6 + mov r17 =0xe + ;; + shladd r16 = r16, 4, r17 + extr.u r19 =r19,0,8 + ;; + shl r16 = r16,8 + ;; + add r19 = r19, r16 + ;; //set ve 1 + dep r19=-1,r19,0,1 + cmp.lt p6,p0=14,r18 + ;; + (p6) mov r18=14 + ;; + (p6) dep r19=r18,r19,2,6 + ;; + cmp.eq p6,p0=0,r23 + ;; + cmp.eq.or p6,p0=4,r23 + ;; + adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + ;; + ld4 r16=[r16] + cmp.eq p7,p0=r0,r0 + (p6) shladd r17=r23,1,r17 + ;; + (p6) st8 [r17]=r19 + (p6) tbit.nz p6,p7=r16,0 + ;; + (p7) mov rr[r28]=r19 + mov r24=r22 + br.many b0 +END(kvm_asm_mov_to_rr) + + +//rsm +GLOBAL_ENTRY(kvm_asm_rsm) +#ifndef ACCE_RSM + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,6,21 + extr.u r27=r25,31,2 + ;; + ld8 r16=[r16] + extr.u r28=r25,36,1 + dep r26=r27,r26,21,2 + ;; + add r17=VPD_VPSR_START_OFFSET,r16 + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + //r26 is imm24 + dep r26=r28,r26,23,1 + ;; + ld8 r18=[r17] + movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI + ld4 r23=[r22] + sub r27=-1,r26 + mov r24=b0 + ;; + mov r20=cr.ipsr + or r28=r27,r28 + and r19=r18,r27 + ;; + st8 [r17]=r19 + and r20=r20,r28 + /* Comment it out due to short of fp lazy alorgithm support + adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 + ;; + ld8 r27=[r27] + ;; + tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT + ;; + (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 + */ + ;; + mov cr.ipsr=r20 + tbit.nz p6,p0=r23,0 + ;; + tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT + (p6) br.dptk kvm_resume_to_guest + ;; + add r26=VMM_VCPU_META_RR0_OFFSET,r21 + add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 + dep r23=-1,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + br.many kvm_resume_to_guest +END(kvm_asm_rsm) + + +//ssm +GLOBAL_ENTRY(kvm_asm_ssm) +#ifndef ACCE_SSM + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,6,21 + extr.u r27=r25,31,2 + ;; + ld8 r16=[r16] + extr.u r28=r25,36,1 + dep r26=r27,r26,21,2 + ;; //r26 is imm24 + add r27=VPD_VPSR_START_OFFSET,r16 + dep r26=r28,r26,23,1 + ;; //r19 vpsr + ld8 r29=[r27] + mov r24=b0 + ;; + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + mov r20=cr.ipsr + or r19=r29,r26 + ;; + ld4 r23=[r22] + st8 [r27]=r19 + or r20=r20,r26 + ;; + mov cr.ipsr=r20 + movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT + ;; + and r19=r28,r19 + tbit.z p6,p0=r23,0 + ;; + cmp.ne.or p6,p0=r28,r19 + (p6) br.dptk kvm_asm_ssm_1 + ;; + add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 + dep r23=0,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + ;; +kvm_asm_ssm_1: + tbit.nz p6,p0=r29,IA64_PSR_I_BIT + ;; + tbit.z.or p6,p0=r19,IA64_PSR_I_BIT + (p6) br.dptk kvm_resume_to_guest + ;; + add r29=VPD_VTPR_START_OFFSET,r16 + add r30=VPD_VHPI_START_OFFSET,r16 + ;; + ld8 r29=[r29] + ld8 r30=[r30] + ;; + extr.u r17=r29,4,4 + extr.u r18=r29,16,1 + ;; + dep r17=r18,r17,4,1 + ;; + cmp.gt p6,p0=r30,r17 + (p6) br.dpnt.few kvm_asm_dispatch_vexirq + br.many kvm_resume_to_guest +END(kvm_asm_ssm) + + +//mov psr.l=r2 +GLOBAL_ENTRY(kvm_asm_mov_to_psr) +#ifndef ACCE_MOV_TO_PSR + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,13,7 //r2 + ;; + ld8 r16=[r16] + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20 + shladd r26=r26,4,r20 + mov r24=b0 + ;; + add r27=VPD_VPSR_START_OFFSET,r16 + mov b0=r26 + br.many b0 + ;; +kvm_asm_mov_to_psr_back: + ld8 r17=[r27] + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + dep r19=0,r19,32,32 + ;; + ld4 r23=[r22] + dep r18=0,r17,0,32 + ;; + add r30=r18,r19 + movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT + ;; + st8 [r27]=r30 + and r27=r28,r30 + and r29=r28,r17 + ;; + cmp.eq p5,p0=r29,r27 + cmp.eq p6,p7=r28,r27 + (p5) br.many kvm_asm_mov_to_psr_1 + ;; + //virtual to physical + (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21 + (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 + (p7) dep r23=-1,r23,0,1 + ;; + //physical to virtual + (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 + (p6) dep r23=0,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + ;; +kvm_asm_mov_to_psr_1: + mov r20=cr.ipsr + movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT + ;; + or r19=r19,r28 + dep r20=0,r20,0,32 + ;; + add r20=r19,r20 + mov b0=r24 + ;; + /* Comment it out due to short of fp lazy algorithm support + adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 + ;; + ld8 r27=[r27] + ;; + tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT + ;; + (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 + ;; + */ + mov cr.ipsr=r20 + cmp.ne p6,p0=r0,r0 + ;; + tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT + tbit.z.or p6,p0=r30,IA64_PSR_I_BIT + (p6) br.dpnt.few kvm_resume_to_guest + ;; + add r29=VPD_VTPR_START_OFFSET,r16 + add r30=VPD_VHPI_START_OFFSET,r16 + ;; + ld8 r29=[r29] + ld8 r30=[r30] + ;; + extr.u r17=r29,4,4 + extr.u r18=r29,16,1 + ;; + dep r17=r18,r17,4,1 + ;; + cmp.gt p6,p0=r30,r17 + (p6) br.dpnt.few kvm_asm_dispatch_vexirq + br.many kvm_resume_to_guest +END(kvm_asm_mov_to_psr) + + +ENTRY(kvm_asm_dispatch_vexirq) +//increment iip + mov r16=cr.ipsr + ;; + extr.u r17=r16,IA64_PSR_RI_BIT,2 + tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 + ;; + (p6) mov r18=cr.iip + (p6) mov r17=r0 + (p7) add r17=1,r17 + ;; + (p6) add r18=0x10,r18 + dep r16=r17,r16,IA64_PSR_RI_BIT,2 + ;; + (p6) mov cr.iip=r18 + mov cr.ipsr=r16 + mov r30 =1 + br.many kvm_dispatch_vexirq +END(kvm_asm_dispatch_vexirq) + +// thash +// TODO: add support when pta.vf = 1 +GLOBAL_ENTRY(kvm_asm_thash) +#ifndef ACCE_THASH + br.many kvm_virtualization_fault_back +#endif + extr.u r17=r25,20,7 // get r3 from opcode in r25 + extr.u r18=r25,6,7 // get r1 from opcode in r25 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20 + shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17) + adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs + ;; + mov r24=b0 + ;; + ld8 r16=[r16] // get VPD addr + mov b0=r17 + br.many b0 // r19 return value + ;; +kvm_asm_thash_back1: + shr.u r23=r19,61 // get RR number + adds r25=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr + adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta + ;; + shladd r27=r23,3,r25 // get vcpu->arch.vrr[r23]'s addr + ld8 r17=[r16] // get PTA + mov r26=1 + ;; + extr.u r29=r17,2,6 // get pta.size + ld8 r25=[r27] // get vcpu->arch.vrr[r23]'s value + ;; + extr.u r25=r25,2,6 // get rr.ps + shl r22=r26,r29 // 1UL << pta.size + ;; + shr.u r23=r19,r25 // vaddr >> rr.ps + adds r26=3,r29 // pta.size + 3 + shl r27=r17,3 // pta << 3 + ;; + shl r23=r23,3 // (vaddr >> rr.ps) << 3 + shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3) + movl r16=7<<61 + ;; + adds r22=-1,r22 // (1UL << pta.size) - 1 + shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<<pta.size + and r19=r19,r16 // vaddr & VRN_MASK + ;; + and r22=r22,r23 // vhpt_offset + or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size) + adds r26=asm_mov_to_reg-asm_mov_from_reg,r20 + ;; + or r19=r19,r22 // calc pval + shladd r17=r18,4,r26 + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + ;; + mov b0=r17 + br.many b0 +END(kvm_asm_thash) + +#define MOV_TO_REG0 \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + nop.b 0x0; \ + ;; \ +}; + + +#define MOV_TO_REG(n) \ +{; \ + mov r##n##=r19; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; + + +#define MOV_FROM_REG(n) \ +{; \ + mov r19=r##n##; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; + + +#define MOV_TO_BANK0_REG(n) \ +ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \ +{; \ + mov r26=r2; \ + mov r2=r19; \ + bsw.1; \ + ;; \ +}; \ +{; \ + mov r##n##=r2; \ + nop.b 0x0; \ + bsw.0; \ + ;; \ +}; \ +{; \ + mov r2=r26; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; \ +END(asm_mov_to_bank0_reg##n##) + + +#define MOV_FROM_BANK0_REG(n) \ +ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \ +{; \ + mov r26=r2; \ + nop.b 0x0; \ + bsw.1; \ + ;; \ +}; \ +{; \ + mov r2=r##n##; \ + nop.b 0x0; \ + bsw.0; \ + ;; \ +}; \ +{; \ + mov r19=r2; \ + mov r2=r26; \ + mov b0=r30; \ +}; \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many b0; \ + ;; \ +}; \ +END(asm_mov_from_bank0_reg##n##) + + +#define JMP_TO_MOV_TO_BANK0_REG(n) \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many asm_mov_to_bank0_reg##n##; \ + ;; \ +} + + +#define JMP_TO_MOV_FROM_BANK0_REG(n) \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many asm_mov_from_bank0_reg##n##; \ + ;; \ +} + + +MOV_FROM_BANK0_REG(16) +MOV_FROM_BANK0_REG(17) +MOV_FROM_BANK0_REG(18) +MOV_FROM_BANK0_REG(19) +MOV_FROM_BANK0_REG(20) +MOV_FROM_BANK0_REG(21) +MOV_FROM_BANK0_REG(22) +MOV_FROM_BANK0_REG(23) +MOV_FROM_BANK0_REG(24) +MOV_FROM_BANK0_REG(25) +MOV_FROM_BANK0_REG(26) +MOV_FROM_BANK0_REG(27) +MOV_FROM_BANK0_REG(28) +MOV_FROM_BANK0_REG(29) +MOV_FROM_BANK0_REG(30) +MOV_FROM_BANK0_REG(31) + + +// mov from reg table +ENTRY(asm_mov_from_reg) + MOV_FROM_REG(0) + MOV_FROM_REG(1) + MOV_FROM_REG(2) + MOV_FROM_REG(3) + MOV_FROM_REG(4) + MOV_FROM_REG(5) + MOV_FROM_REG(6) + MOV_FROM_REG(7) + MOV_FROM_REG(8) + MOV_FROM_REG(9) + MOV_FROM_REG(10) + MOV_FROM_REG(11) + MOV_FROM_REG(12) + MOV_FROM_REG(13) + MOV_FROM_REG(14) + MOV_FROM_REG(15) + JMP_TO_MOV_FROM_BANK0_REG(16) + JMP_TO_MOV_FROM_BANK0_REG(17) + JMP_TO_MOV_FROM_BANK0_REG(18) + JMP_TO_MOV_FROM_BANK0_REG(19) + JMP_TO_MOV_FROM_BANK0_REG(20) + JMP_TO_MOV_FROM_BANK0_REG(21) + JMP_TO_MOV_FROM_BANK0_REG(22) + JMP_TO_MOV_FROM_BANK0_REG(23) + JMP_TO_MOV_FROM_BANK0_REG(24) + JMP_TO_MOV_FROM_BANK0_REG(25) + JMP_TO_MOV_FROM_BANK0_REG(26) + JMP_TO_MOV_FROM_BANK0_REG(27) + JMP_TO_MOV_FROM_BANK0_REG(28) + JMP_TO_MOV_FROM_BANK0_REG(29) + JMP_TO_MOV_FROM_BANK0_REG(30) + JMP_TO_MOV_FROM_BANK0_REG(31) + MOV_FROM_REG(32) + MOV_FROM_REG(33) + MOV_FROM_REG(34) + MOV_FROM_REG(35) + MOV_FROM_REG(36) + MOV_FROM_REG(37) + MOV_FROM_REG(38) + MOV_FROM_REG(39) + MOV_FROM_REG(40) + MOV_FROM_REG(41) + MOV_FROM_REG(42) + MOV_FROM_REG(43) + MOV_FROM_REG(44) + MOV_FROM_REG(45) + MOV_FROM_REG(46) + MOV_FROM_REG(47) + MOV_FROM_REG(48) + MOV_FROM_REG(49) + MOV_FROM_REG(50) + MOV_FROM_REG(51) + MOV_FROM_REG(52) + MOV_FROM_REG(53) + MOV_FROM_REG(54) + MOV_FROM_REG(55) + MOV_FROM_REG(56) + MOV_FROM_REG(57) + MOV_FROM_REG(58) + MOV_FROM_REG(59) + MOV_FROM_REG(60) + MOV_FROM_REG(61) + MOV_FROM_REG(62) + MOV_FROM_REG(63) + MOV_FROM_REG(64) + MOV_FROM_REG(65) + MOV_FROM_REG(66) + MOV_FROM_REG(67) + MOV_FROM_REG(68) + MOV_FROM_REG(69) + MOV_FROM_REG(70) + MOV_FROM_REG(71) + MOV_FROM_REG(72) + MOV_FROM_REG(73) + MOV_FROM_REG(74) + MOV_FROM_REG(75) + MOV_FROM_REG(76) + MOV_FROM_REG(77) + MOV_FROM_REG(78) + MOV_FROM_REG(79) + MOV_FROM_REG(80) + MOV_FROM_REG(81) + MOV_FROM_REG(82) + MOV_FROM_REG(83) + MOV_FROM_REG(84) + MOV_FROM_REG(85) + MOV_FROM_REG(86) + MOV_FROM_REG(87) + MOV_FROM_REG(88) + MOV_FROM_REG(89) + MOV_FROM_REG(90) + MOV_FROM_REG(91) + MOV_FROM_REG(92) + MOV_FROM_REG(93) + MOV_FROM_REG(94) + MOV_FROM_REG(95) + MOV_FROM_REG(96) + MOV_FROM_REG(97) + MOV_FROM_REG(98) + MOV_FROM_REG(99) + MOV_FROM_REG(100) + MOV_FROM_REG(101) + MOV_FROM_REG(102) + MOV_FROM_REG(103) + MOV_FROM_REG(104) + MOV_FROM_REG(105) + MOV_FROM_REG(106) + MOV_FROM_REG(107) + MOV_FROM_REG(108) + MOV_FROM_REG(109) + MOV_FROM_REG(110) + MOV_FROM_REG(111) + MOV_FROM_REG(112) + MOV_FROM_REG(113) + MOV_FROM_REG(114) + MOV_FROM_REG(115) + MOV_FROM_REG(116) + MOV_FROM_REG(117) + MOV_FROM_REG(118) + MOV_FROM_REG(119) + MOV_FROM_REG(120) + MOV_FROM_REG(121) + MOV_FROM_REG(122) + MOV_FROM_REG(123) + MOV_FROM_REG(124) + MOV_FROM_REG(125) + MOV_FROM_REG(126) + MOV_FROM_REG(127) +END(asm_mov_from_reg) + + +/* must be in bank 0 + * parameter: + * r31: pr + * r24: b0 + */ +ENTRY(kvm_resume_to_guest) + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + ld8 r1 =[r16] + adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21 + ;; + mov r16=cr.ipsr + ;; + ld8 r20 = [r20] + adds r19=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r25=[r19] + extr.u r17=r16,IA64_PSR_RI_BIT,2 + tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 + ;; + (p6) mov r18=cr.iip + (p6) mov r17=r0 + ;; + (p6) add r18=0x10,r18 + (p7) add r17=1,r17 + ;; + (p6) mov cr.iip=r18 + dep r16=r17,r16,IA64_PSR_RI_BIT,2 + ;; + mov cr.ipsr=r16 + adds r19= VPD_VPSR_START_OFFSET,r25 + add r28=PAL_VPS_RESUME_NORMAL,r20 + add r29=PAL_VPS_RESUME_HANDLER,r20 + ;; + ld8 r19=[r19] + mov b0=r29 + cmp.ne p6,p7 = r0,r0 + ;; + tbit.z p6,p7 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + ;; + (p6) ld8 r26=[r25] + (p7) mov b0=r28 + mov pr=r31,-2 + br.sptk.many b0 // call pal service + ;; +END(kvm_resume_to_guest) + + +MOV_TO_BANK0_REG(16) +MOV_TO_BANK0_REG(17) +MOV_TO_BANK0_REG(18) +MOV_TO_BANK0_REG(19) +MOV_TO_BANK0_REG(20) +MOV_TO_BANK0_REG(21) +MOV_TO_BANK0_REG(22) +MOV_TO_BANK0_REG(23) +MOV_TO_BANK0_REG(24) +MOV_TO_BANK0_REG(25) +MOV_TO_BANK0_REG(26) +MOV_TO_BANK0_REG(27) +MOV_TO_BANK0_REG(28) +MOV_TO_BANK0_REG(29) +MOV_TO_BANK0_REG(30) +MOV_TO_BANK0_REG(31) + + +// mov to reg table +ENTRY(asm_mov_to_reg) + MOV_TO_REG0 + MOV_TO_REG(1) + MOV_TO_REG(2) + MOV_TO_REG(3) + MOV_TO_REG(4) + MOV_TO_REG(5) + MOV_TO_REG(6) + MOV_TO_REG(7) + MOV_TO_REG(8) + MOV_TO_REG(9) + MOV_TO_REG(10) + MOV_TO_REG(11) + MOV_TO_REG(12) + MOV_TO_REG(13) + MOV_TO_REG(14) + MOV_TO_REG(15) + JMP_TO_MOV_TO_BANK0_REG(16) + JMP_TO_MOV_TO_BANK0_REG(17) + JMP_TO_MOV_TO_BANK0_REG(18) + JMP_TO_MOV_TO_BANK0_REG(19) + JMP_TO_MOV_TO_BANK0_REG(20) + JMP_TO_MOV_TO_BANK0_REG(21) + JMP_TO_MOV_TO_BANK0_REG(22) + JMP_TO_MOV_TO_BANK0_REG(23) + JMP_TO_MOV_TO_BANK0_REG(24) + JMP_TO_MOV_TO_BANK0_REG(25) + JMP_TO_MOV_TO_BANK0_REG(26) + JMP_TO_MOV_TO_BANK0_REG(27) + JMP_TO_MOV_TO_BANK0_REG(28) + JMP_TO_MOV_TO_BANK0_REG(29) + JMP_TO_MOV_TO_BANK0_REG(30) + JMP_TO_MOV_TO_BANK0_REG(31) + MOV_TO_REG(32) + MOV_TO_REG(33) + MOV_TO_REG(34) + MOV_TO_REG(35) + MOV_TO_REG(36) + MOV_TO_REG(37) + MOV_TO_REG(38) + MOV_TO_REG(39) + MOV_TO_REG(40) + MOV_TO_REG(41) + MOV_TO_REG(42) + MOV_TO_REG(43) + MOV_TO_REG(44) + MOV_TO_REG(45) + MOV_TO_REG(46) + MOV_TO_REG(47) + MOV_TO_REG(48) + MOV_TO_REG(49) + MOV_TO_REG(50) + MOV_TO_REG(51) + MOV_TO_REG(52) + MOV_TO_REG(53) + MOV_TO_REG(54) + MOV_TO_REG(55) + MOV_TO_REG(56) + MOV_TO_REG(57) + MOV_TO_REG(58) + MOV_TO_REG(59) + MOV_TO_REG(60) + MOV_TO_REG(61) + MOV_TO_REG(62) + MOV_TO_REG(63) + MOV_TO_REG(64) + MOV_TO_REG(65) + MOV_TO_REG(66) + MOV_TO_REG(67) + MOV_TO_REG(68) + MOV_TO_REG(69) + MOV_TO_REG(70) + MOV_TO_REG(71) + MOV_TO_REG(72) + MOV_TO_REG(73) + MOV_TO_REG(74) + MOV_TO_REG(75) + MOV_TO_REG(76) + MOV_TO_REG(77) + MOV_TO_REG(78) + MOV_TO_REG(79) + MOV_TO_REG(80) + MOV_TO_REG(81) + MOV_TO_REG(82) + MOV_TO_REG(83) + MOV_TO_REG(84) + MOV_TO_REG(85) + MOV_TO_REG(86) + MOV_TO_REG(87) + MOV_TO_REG(88) + MOV_TO_REG(89) + MOV_TO_REG(90) + MOV_TO_REG(91) + MOV_TO_REG(92) + MOV_TO_REG(93) + MOV_TO_REG(94) + MOV_TO_REG(95) + MOV_TO_REG(96) + MOV_TO_REG(97) + MOV_TO_REG(98) + MOV_TO_REG(99) + MOV_TO_REG(100) + MOV_TO_REG(101) + MOV_TO_REG(102) + MOV_TO_REG(103) + MOV_TO_REG(104) + MOV_TO_REG(105) + MOV_TO_REG(106) + MOV_TO_REG(107) + MOV_TO_REG(108) + MOV_TO_REG(109) + MOV_TO_REG(110) + MOV_TO_REG(111) + MOV_TO_REG(112) + MOV_TO_REG(113) + MOV_TO_REG(114) + MOV_TO_REG(115) + MOV_TO_REG(116) + MOV_TO_REG(117) + MOV_TO_REG(118) + MOV_TO_REG(119) + MOV_TO_REG(120) + MOV_TO_REG(121) + MOV_TO_REG(122) + MOV_TO_REG(123) + MOV_TO_REG(124) + MOV_TO_REG(125) + MOV_TO_REG(126) + MOV_TO_REG(127) +END(asm_mov_to_reg) diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c new file mode 100644 index 00000000000..5a33f7ed29a --- /dev/null +++ b/arch/ia64/kvm/process.c @@ -0,0 +1,970 @@ +/* + * process.c: handle interruption inject for guests. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) <susie.li@intel.com> + * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com> + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + */ +#include "vcpu.h" + +#include <asm/pal.h> +#include <asm/sal.h> +#include <asm/fpswa.h> +#include <asm/kregs.h> +#include <asm/tlb.h> + +fpswa_interface_t *vmm_fpswa_interface; + +#define IA64_VHPT_TRANS_VECTOR 0x0000 +#define IA64_INST_TLB_VECTOR 0x0400 +#define IA64_DATA_TLB_VECTOR 0x0800 +#define IA64_ALT_INST_TLB_VECTOR 0x0c00 +#define IA64_ALT_DATA_TLB_VECTOR 0x1000 +#define IA64_DATA_NESTED_TLB_VECTOR 0x1400 +#define IA64_INST_KEY_MISS_VECTOR 0x1800 +#define IA64_DATA_KEY_MISS_VECTOR 0x1c00 +#define IA64_DIRTY_BIT_VECTOR 0x2000 +#define IA64_INST_ACCESS_BIT_VECTOR 0x2400 +#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800 +#define IA64_BREAK_VECTOR 0x2c00 +#define IA64_EXTINT_VECTOR 0x3000 +#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000 +#define IA64_KEY_PERMISSION_VECTOR 0x5100 +#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200 +#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300 +#define IA64_GENEX_VECTOR 0x5400 +#define IA64_DISABLED_FPREG_VECTOR 0x5500 +#define IA64_NAT_CONSUMPTION_VECTOR 0x5600 +#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */ +#define IA64_DEBUG_VECTOR 0x5900 +#define IA64_UNALIGNED_REF_VECTOR 0x5a00 +#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00 +#define IA64_FP_FAULT_VECTOR 0x5c00 +#define IA64_FP_TRAP_VECTOR 0x5d00 +#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00 +#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00 +#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000 + +/* SDM vol2 5.5 - IVA based interruption handling */ +#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\ + IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \ + IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT) + +#define DOMN_PAL_REQUEST 0x110000 +#define DOMN_SAL_REQUEST 0x110001 + +static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800, + 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, + 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400, + 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00, + 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, + 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, + 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, + 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00 +}; + +static void collect_interruption(struct kvm_vcpu *vcpu) +{ + u64 ipsr; + u64 vdcr; + u64 vifs; + unsigned long vpsr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + vpsr = vcpu_get_psr(vcpu); + vcpu_bsw0(vcpu); + if (vpsr & IA64_PSR_IC) { + + /* Sync mpsr id/da/dd/ss/ed bits to vipsr + * since after guest do rfi, we still want these bits on in + * mpsr + */ + + ipsr = regs->cr_ipsr; + vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA + | IA64_PSR_DD | IA64_PSR_SS + | IA64_PSR_ED)); + vcpu_set_ipsr(vcpu, vpsr); + + /* Currently, for trap, we do not advance IIP to next + * instruction. That's because we assume caller already + * set up IIP correctly + */ + + vcpu_set_iip(vcpu , regs->cr_iip); + + /* set vifs.v to zero */ + vifs = VCPU(vcpu, ifs); + vifs &= ~IA64_IFS_V; + vcpu_set_ifs(vcpu, vifs); + + vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa)); + } + + vdcr = VCPU(vcpu, dcr); + + /* Set guest psr + * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged + * be: set to the value of dcr.be + * pp: set to the value of dcr.pp + */ + vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION; + vpsr |= (vdcr & IA64_DCR_BE); + + /* VDCR pp bit position is different from VPSR pp bit */ + if (vdcr & IA64_DCR_PP) { + vpsr |= IA64_PSR_PP; + } else { + vpsr &= ~IA64_PSR_PP;; + } + + vcpu_set_psr(vcpu, vpsr); + +} + +void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec) +{ + u64 viva; + struct kvm_pt_regs *regs; + union ia64_isr pt_isr; + + regs = vcpu_regs(vcpu); + + /* clear cr.isr.ir (incomplete register frame)*/ + pt_isr.val = VMX(vcpu, cr_isr); + pt_isr.ir = 0; + VMX(vcpu, cr_isr) = pt_isr.val; + + collect_interruption(vcpu); + + viva = vcpu_get_iva(vcpu); + regs->cr_iip = viva + vec; +} + +static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa) +{ + union ia64_rr rr, rr1; + + rr.val = vcpu_get_rr(vcpu, ifa); + rr1.val = 0; + rr1.ps = rr.ps; + rr1.rid = rr.rid; + return (rr1.val); +} + + +/* + * Set vIFA & vITIR & vIHA, when vPSR.ic =1 + * Parameter: + * set_ifa: if true, set vIFA + * set_itir: if true, set vITIR + * set_iha: if true, set vIHA + */ +void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr, + int set_ifa, int set_itir, int set_iha) +{ + long vpsr; + u64 value; + + vpsr = VCPU(vcpu, vpsr); + /* Vol2, Table 8-1 */ + if (vpsr & IA64_PSR_IC) { + if (set_ifa) + vcpu_set_ifa(vcpu, vadr); + if (set_itir) { + value = vcpu_get_itir_on_fault(vcpu, vadr); + vcpu_set_itir(vcpu, value); + } + + if (set_iha) { + value = vcpu_thash(vcpu, vadr); + vcpu_set_iha(vcpu, value); + } + } +} + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR); +} + +/* + * Instruction TLB Fault + * @ Instruction TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR); +} + + + +/* + * Data Nested TLB Fault + * @ Data Nested TLB Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void nested_dtlb(struct kvm_vcpu *vcpu) +{ + inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR); +} + +/* + * Alternate Data TLB Fault + * @ Alternate Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr) +{ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR); +} + + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr) +{ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR); +} + +/* Deal with: + * VHPT Translation Vector + */ +static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA*/ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR); + + +} + +/* + * VHPT Instruction Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + +/* + * VHPT Data Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + + +/* + * Deal with: + * General Exception vector + */ +void _general_exception(struct kvm_vcpu *vcpu) +{ + inject_guest_interruption(vcpu, IA64_GENEX_VECTOR); +} + + +/* + * Illegal Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void illegal_op(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Illegal Dependency Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void illegal_dep(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Reserved Register/Field Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void rsv_reg_field(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} +/* + * Privileged Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ + +void privilege_op(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Unimplement Data Address Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void unimpl_daddr(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Privileged Register Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void privilege_reg(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* Deal with + * Nat consumption vector + * Parameter: + * vaddr: Optional, if t == REGISTER + */ +static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr, + enum tlb_miss_type t) +{ + /* If vPSR.ic && t == DATA/INST, IFA */ + if (t == DATA || t == INSTRUCTION) { + /* IFA */ + set_ifa_itir_iha(vcpu, vadr, 1, 0, 0); + } + + inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR); +} + +/* + * Instruction Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, INSTRUCTION); +} + +/* + * Register Nat Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void rnat_consumption(struct kvm_vcpu *vcpu) +{ + _nat_consumption_fault(vcpu, 0, REGISTER); +} + +/* + * Data Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, DATA); +} + +/* Deal with + * Page not present vector + */ +static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); +} + + +void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + __page_not_present(vcpu, vadr); +} + + +void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + __page_not_present(vcpu, vadr); +} + + +/* Deal with + * Data access rights vector + */ +void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR); +} + +fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr, + unsigned long *fpsr, unsigned long *isr, unsigned long *pr, + unsigned long *ifs, struct kvm_pt_regs *regs) +{ + fp_state_t fp_state; + fpswa_ret_t ret; + struct kvm_vcpu *vcpu = current_vcpu; + + uint64_t old_rr7 = ia64_get_rr(7UL<<61); + + if (!vmm_fpswa_interface) + return (fpswa_ret_t) {-1, 0, 0, 0}; + + /* + * Just let fpswa driver to use hardware fp registers. + * No fp register is valid in memory. + */ + memset(&fp_state, 0, sizeof(fp_state_t)); + + /* + * unsigned long (*EFI_FPSWA) ( + * unsigned long trap_type, + * void *Bundle, + * unsigned long *pipsr, + * unsigned long *pfsr, + * unsigned long *pisr, + * unsigned long *ppreds, + * unsigned long *pifs, + * void *fp_state); + */ + /*Call host fpswa interface directly to virtualize + *guest fpswa request! + */ + ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]); + ia64_srlz_d(); + + ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle, + ipsr, fpsr, isr, pr, ifs, &fp_state); + ia64_set_rr(7UL << 61, old_rr7); + ia64_srlz_d(); + return ret; +} + +/* + * Handle floating-point assist faults and traps for domain. + */ +unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs, + unsigned long isr) +{ + struct kvm_vcpu *v = current_vcpu; + IA64_BUNDLE bundle; + unsigned long fault_ip; + fpswa_ret_t ret; + + fault_ip = regs->cr_iip; + /* + * When the FP trap occurs, the trapping instruction is completed. + * If ipsr.ri == 0, there is the trapping instruction in previous + * bundle. + */ + if (!fp_fault && (ia64_psr(regs)->ri == 0)) + fault_ip -= 16; + + if (fetch_code(v, fault_ip, &bundle)) + return -EAGAIN; + + if (!bundle.i64[0] && !bundle.i64[1]) + return -EACCES; + + ret = vmm_fp_emulate(fp_fault, &bundle, ®s->cr_ipsr, ®s->ar_fpsr, + &isr, ®s->pr, ®s->cr_ifs, regs); + return ret.status; +} + +void reflect_interruption(u64 ifa, u64 isr, u64 iim, + u64 vec, struct kvm_pt_regs *regs) +{ + u64 vector; + int status ; + struct kvm_vcpu *vcpu = current_vcpu; + u64 vpsr = VCPU(vcpu, vpsr); + + vector = vec2off[vec]; + + if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { + panic_vm(vcpu); + return; + } + + switch (vec) { + case 32: /*IA64_FP_FAULT_VECTOR*/ + status = vmm_handle_fpu_swa(1, regs, isr); + if (!status) { + vcpu_increment_iip(vcpu); + return; + } else if (-EAGAIN == status) + return; + break; + case 33: /*IA64_FP_TRAP_VECTOR*/ + status = vmm_handle_fpu_swa(0, regs, isr); + if (!status) + return ; + else if (-EAGAIN == status) { + vcpu_decrement_iip(vcpu); + return ; + } + break; + } + + VCPU(vcpu, isr) = isr; + VCPU(vcpu, iipa) = regs->cr_iip; + if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) + VCPU(vcpu, iim) = iim; + else + set_ifa_itir_iha(vcpu, ifa, 1, 1, 1); + + inject_guest_interruption(vcpu, vector); +} + +static void set_pal_call_data(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + /*FIXME:For static and stacked convention, firmware + * has put the parameters in gr28-gr31 before + * break to vmm !!*/ + + p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28); + p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29); + p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); + p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31); + p->exit_reason = EXIT_REASON_PAL_CALL; +} + +static void set_pal_call_result(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + if (p->exit_reason == EXIT_REASON_PAL_CALL) { + vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0); + vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0); + vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); + vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); + } else + panic_vm(vcpu); +} + +static void set_sal_call_data(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32); + p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33); + p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34); + p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35); + p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36); + p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37); + p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38); + p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39); + p->exit_reason = EXIT_REASON_SAL_CALL; +} + +static void set_sal_call_result(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0); + vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0); + vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); + vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); + } else + panic_vm(vcpu); +} + +void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, + unsigned long isr, unsigned long iim) +{ + struct kvm_vcpu *v = current_vcpu; + + if (ia64_psr(regs)->cpl == 0) { + /* Allow hypercalls only when cpl = 0. */ + if (iim == DOMN_PAL_REQUEST) { + set_pal_call_data(v); + vmm_transition(v); + set_pal_call_result(v); + vcpu_increment_iip(v); + return; + } else if (iim == DOMN_SAL_REQUEST) { + set_sal_call_data(v); + vmm_transition(v); + set_sal_call_result(v); + vcpu_increment_iip(v); + return; + } + } + reflect_interruption(ifa, isr, iim, 11, regs); +} + +void check_pending_irq(struct kvm_vcpu *vcpu) +{ + int mask, h_pending, h_inservice; + u64 isr; + unsigned long vpsr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + h_pending = highest_pending_irq(vcpu); + if (h_pending == NULL_VECTOR) { + update_vhpi(vcpu, NULL_VECTOR); + return; + } + h_inservice = highest_inservice_irq(vcpu); + + vpsr = VCPU(vcpu, vpsr); + mask = irq_masked(vcpu, h_pending, h_inservice); + if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) { + isr = vpsr & IA64_PSR_RI; + update_vhpi(vcpu, h_pending); + reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ + } else if (mask == IRQ_MASKED_BY_INSVC) { + if (VCPU(vcpu, vhpi)) + update_vhpi(vcpu, NULL_VECTOR); + } else { + /* masked by vpsr.i or vtpr.*/ + update_vhpi(vcpu, h_pending); + } +} + +static void generate_exirq(struct kvm_vcpu *vcpu) +{ + unsigned vpsr; + uint64_t isr; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + vpsr = VCPU(vcpu, vpsr); + isr = vpsr & IA64_PSR_RI; + if (!(vpsr & IA64_PSR_IC)) + panic_vm(vcpu); + reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ +} + +void vhpi_detection(struct kvm_vcpu *vcpu) +{ + uint64_t threshold, vhpi; + union ia64_tpr vtpr; + struct ia64_psr vpsr; + + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + vtpr.val = VCPU(vcpu, tpr); + + threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic; + vhpi = VCPU(vcpu, vhpi); + if (vhpi > threshold) { + /* interrupt actived*/ + generate_exirq(vcpu); + } +} + + +void leave_hypervisor_tail(void) +{ + struct kvm_vcpu *v = current_vcpu; + + if (VMX(v, timer_check)) { + VMX(v, timer_check) = 0; + if (VMX(v, itc_check)) { + if (vcpu_get_itc(v) > VCPU(v, itm)) { + if (!(VCPU(v, itv) & (1 << 16))) { + vcpu_pend_interrupt(v, VCPU(v, itv) + & 0xff); + VMX(v, itc_check) = 0; + } else { + v->arch.timer_pending = 1; + } + VMX(v, last_itc) = VCPU(v, itm) + 1; + } + } + } + + rmb(); + if (v->arch.irq_new_pending) { + v->arch.irq_new_pending = 0; + VMX(v, irq_check) = 0; + check_pending_irq(v); + return; + } + if (VMX(v, irq_check)) { + VMX(v, irq_check) = 0; + vhpi_detection(v); + } +} + + +static inline void handle_lds(struct kvm_pt_regs *regs) +{ + regs->cr_ipsr |= IA64_PSR_ED; +} + +void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type) +{ + unsigned long pte; + union ia64_rr rr; + + rr.val = ia64_get_rr(vadr); + pte = vadr & _PAGE_PPN_MASK; + pte = pte | PHY_PAGE_WB; + thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type); + return; +} + +void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs) +{ + unsigned long vpsr; + int type; + + u64 vhpt_adr, gppa, pteval, rr, itir; + union ia64_isr misr; + union ia64_pta vpta; + struct thash_data *data; + struct kvm_vcpu *v = current_vcpu; + + vpsr = VCPU(v, vpsr); + misr.val = VMX(v, cr_isr); + + type = vec; + + if (is_physical_mode(v) && (!(vadr << 1 >> 62))) { + if (vec == 2) { + if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) { + emulate_io_inst(v, ((vadr << 1) >> 1), 4); + return; + } + } + physical_tlb_miss(v, vadr, type); + return; + } + data = vtlb_lookup(v, vadr, type); + if (data != 0) { + if (type == D_TLB) { + gppa = (vadr & ((1UL << data->ps) - 1)) + + (data->ppn >> (data->ps - 12) << data->ps); + if (__gpfn_is_io(gppa >> PAGE_SHIFT)) { + if (data->pl >= ((regs->cr_ipsr >> + IA64_PSR_CPL0_BIT) & 3)) + emulate_io_inst(v, gppa, data->ma); + else { + vcpu_set_isr(v, misr.val); + data_access_rights(v, vadr); + } + return ; + } + } + thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type); + + } else if (type == D_TLB) { + if (misr.sp) { + handle_lds(regs); + return; + } + + rr = vcpu_get_rr(v, vadr); + itir = rr & (RR_RID_MASK | RR_PS_MASK); + + if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) { + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + alt_dtlb(v, vadr); + } else { + nested_dtlb(v); + } + return ; + } + + vpta.val = vcpu_get_pta(v); + /* avoid recursively walking (short format) VHPT */ + + vhpt_adr = vcpu_thash(v, vadr); + if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { + /* VHPT successfully read. */ + if (!(pteval & _PAGE_P)) { + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dtlb_fault(v, vadr); + } else { + nested_dtlb(v); + } + } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) { + thash_purge_and_insert(v, pteval, itir, + vadr, D_TLB); + } else if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dtlb_fault(v, vadr); + } else { + nested_dtlb(v); + } + } else { + /* Can't read VHPT. */ + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dvhpt_fault(v, vadr); + } else { + nested_dtlb(v); + } + } + } else if (type == I_TLB) { + if (!(vpsr & IA64_PSR_IC)) + misr.ni = 1; + if (!vhpt_enabled(v, vadr, INST_REF)) { + vcpu_set_isr(v, misr.val); + alt_itlb(v, vadr); + return; + } + + vpta.val = vcpu_get_pta(v); + + vhpt_adr = vcpu_thash(v, vadr); + if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { + /* VHPT successfully read. */ + if (pteval & _PAGE_P) { + if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) { + vcpu_set_isr(v, misr.val); + itlb_fault(v, vadr); + return ; + } + rr = vcpu_get_rr(v, vadr); + itir = rr & (RR_RID_MASK | RR_PS_MASK); + thash_purge_and_insert(v, pteval, itir, + vadr, I_TLB); + } else { + vcpu_set_isr(v, misr.val); + inst_page_not_present(v, vadr); + } + } else { + vcpu_set_isr(v, misr.val); + ivhpt_fault(v, vadr); + } + } +} + +void kvm_vexirq(struct kvm_vcpu *vcpu) +{ + u64 vpsr, isr; + struct kvm_pt_regs *regs; + + regs = vcpu_regs(vcpu); + vpsr = VCPU(vcpu, vpsr); + isr = vpsr & IA64_PSR_RI; + reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/ +} + +void kvm_ia64_handle_irq(struct kvm_vcpu *v) +{ + struct exit_ctl_data *p = &v->arch.exit_data; + long psr; + + local_irq_save(psr); + p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; + vmm_transition(v); + local_irq_restore(psr); + + VMX(v, timer_check) = 1; + +} + +static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos) +{ + u64 oldrid, moldrid, oldpsbits, vaddr; + struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos]; + vaddr = p->vaddr; + + oldrid = VMX(v, vrr[0]); + VMX(v, vrr[0]) = p->rr; + oldpsbits = VMX(v, psbits[0]); + VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]); + moldrid = ia64_get_rr(0x0); + ia64_set_rr(0x0, vrrtomrr(p->rr)); + ia64_srlz_d(); + + vaddr = PAGEALIGN(vaddr, p->ps); + thash_purge_entries_remote(v, vaddr, p->ps); + + VMX(v, vrr[0]) = oldrid; + VMX(v, psbits[0]) = oldpsbits; + ia64_set_rr(0x0, moldrid); + ia64_dv_serialize_data(); +} + +static void vcpu_do_resume(struct kvm_vcpu *vcpu) +{ + /*Re-init VHPT and VTLB once from resume*/ + vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES; + thash_init(&vcpu->arch.vhpt, VHPT_SHIFT); + vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES; + thash_init(&vcpu->arch.vtlb, VTLB_SHIFT); + + ia64_set_pta(vcpu->arch.vhpt.pta.val); +} + +static void kvm_do_resume_op(struct kvm_vcpu *vcpu) +{ + if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { + vcpu_do_resume(vcpu); + return; + } + + if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) { + thash_purge_all(vcpu); + return; + } + + if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) { + while (vcpu->arch.ptc_g_count > 0) + ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count); + } +} + +void vmm_transition(struct kvm_vcpu *vcpu) +{ + ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd, + 0, 0, 0, 0, 0, 0); + vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host); + ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, + 0, 0, 0, 0, 0, 0); + kvm_do_resume_op(vcpu); +} diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S new file mode 100644 index 00000000000..30897d44d61 --- /dev/null +++ b/arch/ia64/kvm/trampoline.S @@ -0,0 +1,1038 @@ +/* Save all processor states + * + * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com> + * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com> + */ + +#include <asm/asmmacro.h> +#include "asm-offsets.h" + + +#define CTX(name) VMM_CTX_##name##_OFFSET + + /* + * r32: context_t base address + */ +#define SAVE_BRANCH_REGS \ + add r2 = CTX(B0),r32; \ + add r3 = CTX(B1),r32; \ + mov r16 = b0; \ + mov r17 = b1; \ + ;; \ + st8 [r2]=r16,16; \ + st8 [r3]=r17,16; \ + ;; \ + mov r16 = b2; \ + mov r17 = b3; \ + ;; \ + st8 [r2]=r16,16; \ + st8 [r3]=r17,16; \ + ;; \ + mov r16 = b4; \ + mov r17 = b5; \ + ;; \ + st8 [r2]=r16; \ + st8 [r3]=r17; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_BRANCH_REGS \ + add r2 = CTX(B0),r33; \ + add r3 = CTX(B1),r33; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov b0 = r16; \ + mov b1 = r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov b2 = r16; \ + mov b3 = r17; \ + ;; \ + ld8 r16=[r2]; \ + ld8 r17=[r3]; \ + ;; \ + mov b4=r16; \ + mov b5=r17; \ + ;; + + + /* + * r32: context_t base address + * bsw == 1 + * Save all bank1 general registers, r4 ~ r7 + */ +#define SAVE_GENERAL_REGS \ + add r2=CTX(R4),r32; \ + add r3=CTX(R5),r32; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r4,16; \ +.mem.offset 8,0; \ + st8.spill [r3]=r5,16; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r6,48; \ +.mem.offset 8,0; \ + st8.spill [r3]=r7,48; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r12; \ +.mem.offset 8,0; \ + st8.spill [r3]=r13; \ + ;; + + /* + * r33: context_t base address + * bsw == 1 + */ +#define RESTORE_GENERAL_REGS \ + add r2=CTX(R4),r33; \ + add r3=CTX(R5),r33; \ + ;; \ + ld8.fill r4=[r2],16; \ + ld8.fill r5=[r3],16; \ + ;; \ + ld8.fill r6=[r2],48; \ + ld8.fill r7=[r3],48; \ + ;; \ + ld8.fill r12=[r2]; \ + ld8.fill r13 =[r3]; \ + ;; + + + + + /* + * r32: context_t base address + */ +#define SAVE_KERNEL_REGS \ + add r2 = CTX(KR0),r32; \ + add r3 = CTX(KR1),r32; \ + mov r16 = ar.k0; \ + mov r17 = ar.k1; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k2; \ + mov r17 = ar.k3; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k4; \ + mov r17 = ar.k5; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k6; \ + mov r17 = ar.k7; \ + ;; \ + st8 [r2] = r16; \ + st8 [r3] = r17; \ + ;; + + + + /* + * r33: context_t base address + */ +#define RESTORE_KERNEL_REGS \ + add r2 = CTX(KR0),r33; \ + add r3 = CTX(KR1),r33; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k0=r16; \ + mov ar.k1=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k2=r16; \ + mov ar.k3=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k4=r16; \ + mov ar.k5=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k6=r16; \ + mov ar.k7=r17; \ + ;; + + + + /* + * r32: context_t base address + */ +#define SAVE_APP_REGS \ + add r2 = CTX(BSPSTORE),r32; \ + mov r16 = ar.bspstore; \ + ;; \ + st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\ + mov r16 = ar.rnat; \ + ;; \ + st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \ + mov r16 = ar.fcr; \ + ;; \ + st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \ + mov r16 = ar.eflag; \ + ;; \ + st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \ + mov r16 = ar.cflg; \ + ;; \ + st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \ + mov r16 = ar.fsr; \ + ;; \ + st8 [r2] = r16,CTX(FIR)-CTX(FSR); \ + mov r16 = ar.fir; \ + ;; \ + st8 [r2] = r16,CTX(FDR)-CTX(FIR); \ + mov r16 = ar.fdr; \ + ;; \ + st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \ + mov r16 = ar.unat; \ + ;; \ + st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \ + mov r16 = ar.fpsr; \ + ;; \ + st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \ + mov r16 = ar.pfs; \ + ;; \ + st8 [r2] = r16,CTX(LC)-CTX(PFS); \ + mov r16 = ar.lc; \ + ;; \ + st8 [r2] = r16; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_APP_REGS \ + add r2=CTX(BSPSTORE),r33; \ + ;; \ + ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \ + ;; \ + mov ar.bspstore=r16; \ + ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \ + ;; \ + mov ar.rnat=r16; \ + ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \ + ;; \ + mov ar.fcr=r16; \ + ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \ + ;; \ + mov ar.eflag=r16; \ + ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \ + ;; \ + mov ar.cflg=r16; \ + ld8 r16=[r2],CTX(FIR)-CTX(FSR); \ + ;; \ + mov ar.fsr=r16; \ + ld8 r16=[r2],CTX(FDR)-CTX(FIR); \ + ;; \ + mov ar.fir=r16; \ + ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \ + ;; \ + mov ar.fdr=r16; \ + ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \ + ;; \ + mov ar.unat=r16; \ + ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \ + ;; \ + mov ar.fpsr=r16; \ + ld8 r16=[r2],CTX(LC)-CTX(PFS); \ + ;; \ + mov ar.pfs=r16; \ + ld8 r16=[r2]; \ + ;; \ + mov ar.lc=r16; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_CTL_REGS \ + add r2 = CTX(DCR),r32; \ + mov r16 = cr.dcr; \ + ;; \ + st8 [r2] = r16,CTX(IVA)-CTX(DCR); \ + ;; \ + mov r16 = cr.iva; \ + ;; \ + st8 [r2] = r16,CTX(PTA)-CTX(IVA); \ + ;; \ + mov r16 = cr.pta; \ + ;; \ + st8 [r2] = r16 ; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_CTL_REGS \ + add r2 = CTX(DCR),r33; \ + ;; \ + ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \ + ;; \ + mov cr.dcr = r16; \ + dv_serialize_data; \ + ;; \ + ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \ + ;; \ + mov cr.iva = r16; \ + dv_serialize_data; \ + ;; \ + ld8 r16 = [r2]; \ + ;; \ + mov cr.pta = r16; \ + dv_serialize_data; \ + ;; + + + /* + * r32: context_t base address + */ +#define SAVE_REGION_REGS \ + add r2=CTX(RR0),r32; \ + mov r16=rr[r0]; \ + dep.z r18=1,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=2,61,3; \ + ;; \ + st8 [r2]=r17,8; \ + mov r16=rr[r18]; \ + dep.z r18=3,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=4,61,3; \ + ;; \ + st8 [r2]=r17,8; \ + mov r16=rr[r18]; \ + dep.z r18=5,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=7,61,3; \ + ;; \ + st8 [r2]=r17,16; \ + mov r16=rr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + ;; + + /* + * r33:context_t base address + */ +#define RESTORE_REGION_REGS \ + add r2=CTX(RR0),r33;\ + mov r18=r0; \ + ;; \ + ld8 r20=[r2],8; \ + ;; /* rr0 */ \ + ld8 r21=[r2],8; \ + ;; /* rr1 */ \ + ld8 r22=[r2],8; \ + ;; /* rr2 */ \ + ld8 r23=[r2],8; \ + ;; /* rr3 */ \ + ld8 r24=[r2],8; \ + ;; /* rr4 */ \ + ld8 r25=[r2],16; \ + ;; /* rr5 */ \ + ld8 r27=[r2]; \ + ;; /* rr7 */ \ + mov rr[r18]=r20; \ + dep.z r18=1,61,3; \ + ;; /* rr1 */ \ + mov rr[r18]=r21; \ + dep.z r18=2,61,3; \ + ;; /* rr2 */ \ + mov rr[r18]=r22; \ + dep.z r18=3,61,3; \ + ;; /* rr3 */ \ + mov rr[r18]=r23; \ + dep.z r18=4,61,3; \ + ;; /* rr4 */ \ + mov rr[r18]=r24; \ + dep.z r18=5,61,3; \ + ;; /* rr5 */ \ + mov rr[r18]=r25; \ + dep.z r18=7,61,3; \ + ;; /* rr7 */ \ + mov rr[r18]=r27; \ + ;; \ + srlz.i; \ + ;; + + + + /* + * r32: context_t base address + * r36~r39:scratch registers + */ +#define SAVE_DEBUG_REGS \ + add r2=CTX(IBR0),r32; \ + add r3=CTX(DBR0),r32; \ + mov r16=ibr[r0]; \ + mov r17=dbr[r0]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=1,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=2,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=2,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=3,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=4,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=5,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=6,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=7,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + ;; + + +/* + * r33: point to context_t structure + * ar.lc are corrupted. + */ +#define RESTORE_DEBUG_REGS \ + add r2=CTX(IBR0),r33; \ + add r3=CTX(DBR0),r33; \ + mov r16=7; \ + mov r17=r0; \ + ;; \ + mov ar.lc = r16; \ + ;; \ +1: \ + ld8 r18=[r2],8; \ + ld8 r19=[r3],8; \ + ;; \ + mov ibr[r17]=r18; \ + mov dbr[r17]=r19; \ + ;; \ + srlz.i; \ + ;; \ + add r17=1,r17; \ + br.cloop.sptk 1b; \ + ;; + + + /* + * r32: context_t base address + */ +#define SAVE_FPU_LOW \ + add r2=CTX(F2),r32; \ + add r3=CTX(F3),r32; \ + ;; \ + stf.spill.nta [r2]=f2,32; \ + stf.spill.nta [r3]=f3,32; \ + ;; \ + stf.spill.nta [r2]=f4,32; \ + stf.spill.nta [r3]=f5,32; \ + ;; \ + stf.spill.nta [r2]=f6,32; \ + stf.spill.nta [r3]=f7,32; \ + ;; \ + stf.spill.nta [r2]=f8,32; \ + stf.spill.nta [r3]=f9,32; \ + ;; \ + stf.spill.nta [r2]=f10,32; \ + stf.spill.nta [r3]=f11,32; \ + ;; \ + stf.spill.nta [r2]=f12,32; \ + stf.spill.nta [r3]=f13,32; \ + ;; \ + stf.spill.nta [r2]=f14,32; \ + stf.spill.nta [r3]=f15,32; \ + ;; \ + stf.spill.nta [r2]=f16,32; \ + stf.spill.nta [r3]=f17,32; \ + ;; \ + stf.spill.nta [r2]=f18,32; \ + stf.spill.nta [r3]=f19,32; \ + ;; \ + stf.spill.nta [r2]=f20,32; \ + stf.spill.nta [r3]=f21,32; \ + ;; \ + stf.spill.nta [r2]=f22,32; \ + stf.spill.nta [r3]=f23,32; \ + ;; \ + stf.spill.nta [r2]=f24,32; \ + stf.spill.nta [r3]=f25,32; \ + ;; \ + stf.spill.nta [r2]=f26,32; \ + stf.spill.nta [r3]=f27,32; \ + ;; \ + stf.spill.nta [r2]=f28,32; \ + stf.spill.nta [r3]=f29,32; \ + ;; \ + stf.spill.nta [r2]=f30; \ + stf.spill.nta [r3]=f31; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_FPU_HIGH \ + add r2=CTX(F32),r32; \ + add r3=CTX(F33),r32; \ + ;; \ + stf.spill.nta [r2]=f32,32; \ + stf.spill.nta [r3]=f33,32; \ + ;; \ + stf.spill.nta [r2]=f34,32; \ + stf.spill.nta [r3]=f35,32; \ + ;; \ + stf.spill.nta [r2]=f36,32; \ + stf.spill.nta [r3]=f37,32; \ + ;; \ + stf.spill.nta [r2]=f38,32; \ + stf.spill.nta [r3]=f39,32; \ + ;; \ + stf.spill.nta [r2]=f40,32; \ + stf.spill.nta [r3]=f41,32; \ + ;; \ + stf.spill.nta [r2]=f42,32; \ + stf.spill.nta [r3]=f43,32; \ + ;; \ + stf.spill.nta [r2]=f44,32; \ + stf.spill.nta [r3]=f45,32; \ + ;; \ + stf.spill.nta [r2]=f46,32; \ + stf.spill.nta [r3]=f47,32; \ + ;; \ + stf.spill.nta [r2]=f48,32; \ + stf.spill.nta [r3]=f49,32; \ + ;; \ + stf.spill.nta [r2]=f50,32; \ + stf.spill.nta [r3]=f51,32; \ + ;; \ + stf.spill.nta [r2]=f52,32; \ + stf.spill.nta [r3]=f53,32; \ + ;; \ + stf.spill.nta [r2]=f54,32; \ + stf.spill.nta [r3]=f55,32; \ + ;; \ + stf.spill.nta [r2]=f56,32; \ + stf.spill.nta [r3]=f57,32; \ + ;; \ + stf.spill.nta [r2]=f58,32; \ + stf.spill.nta [r3]=f59,32; \ + ;; \ + stf.spill.nta [r2]=f60,32; \ + stf.spill.nta [r3]=f61,32; \ + ;; \ + stf.spill.nta [r2]=f62,32; \ + stf.spill.nta [r3]=f63,32; \ + ;; \ + stf.spill.nta [r2]=f64,32; \ + stf.spill.nta [r3]=f65,32; \ + ;; \ + stf.spill.nta [r2]=f66,32; \ + stf.spill.nta [r3]=f67,32; \ + ;; \ + stf.spill.nta [r2]=f68,32; \ + stf.spill.nta [r3]=f69,32; \ + ;; \ + stf.spill.nta [r2]=f70,32; \ + stf.spill.nta [r3]=f71,32; \ + ;; \ + stf.spill.nta [r2]=f72,32; \ + stf.spill.nta [r3]=f73,32; \ + ;; \ + stf.spill.nta [r2]=f74,32; \ + stf.spill.nta [r3]=f75,32; \ + ;; \ + stf.spill.nta [r2]=f76,32; \ + stf.spill.nta [r3]=f77,32; \ + ;; \ + stf.spill.nta [r2]=f78,32; \ + stf.spill.nta [r3]=f79,32; \ + ;; \ + stf.spill.nta [r2]=f80,32; \ + stf.spill.nta [r3]=f81,32; \ + ;; \ + stf.spill.nta [r2]=f82,32; \ + stf.spill.nta [r3]=f83,32; \ + ;; \ + stf.spill.nta [r2]=f84,32; \ + stf.spill.nta [r3]=f85,32; \ + ;; \ + stf.spill.nta [r2]=f86,32; \ + stf.spill.nta [r3]=f87,32; \ + ;; \ + stf.spill.nta [r2]=f88,32; \ + stf.spill.nta [r3]=f89,32; \ + ;; \ + stf.spill.nta [r2]=f90,32; \ + stf.spill.nta [r3]=f91,32; \ + ;; \ + stf.spill.nta [r2]=f92,32; \ + stf.spill.nta [r3]=f93,32; \ + ;; \ + stf.spill.nta [r2]=f94,32; \ + stf.spill.nta [r3]=f95,32; \ + ;; \ + stf.spill.nta [r2]=f96,32; \ + stf.spill.nta [r3]=f97,32; \ + ;; \ + stf.spill.nta [r2]=f98,32; \ + stf.spill.nta [r3]=f99,32; \ + ;; \ + stf.spill.nta [r2]=f100,32; \ + stf.spill.nta [r3]=f101,32; \ + ;; \ + stf.spill.nta [r2]=f102,32; \ + stf.spill.nta [r3]=f103,32; \ + ;; \ + stf.spill.nta [r2]=f104,32; \ + stf.spill.nta [r3]=f105,32; \ + ;; \ + stf.spill.nta [r2]=f106,32; \ + stf.spill.nta [r3]=f107,32; \ + ;; \ + stf.spill.nta [r2]=f108,32; \ + stf.spill.nta [r3]=f109,32; \ + ;; \ + stf.spill.nta [r2]=f110,32; \ + stf.spill.nta [r3]=f111,32; \ + ;; \ + stf.spill.nta [r2]=f112,32; \ + stf.spill.nta [r3]=f113,32; \ + ;; \ + stf.spill.nta [r2]=f114,32; \ + stf.spill.nta [r3]=f115,32; \ + ;; \ + stf.spill.nta [r2]=f116,32; \ + stf.spill.nta [r3]=f117,32; \ + ;; \ + stf.spill.nta [r2]=f118,32; \ + stf.spill.nta [r3]=f119,32; \ + ;; \ + stf.spill.nta [r2]=f120,32; \ + stf.spill.nta [r3]=f121,32; \ + ;; \ + stf.spill.nta [r2]=f122,32; \ + stf.spill.nta [r3]=f123,32; \ + ;; \ + stf.spill.nta [r2]=f124,32; \ + stf.spill.nta [r3]=f125,32; \ + ;; \ + stf.spill.nta [r2]=f126; \ + stf.spill.nta [r3]=f127; \ + ;; + + /* + * r33: point to context_t structure + */ +#define RESTORE_FPU_LOW \ + add r2 = CTX(F2), r33; \ + add r3 = CTX(F3), r33; \ + ;; \ + ldf.fill.nta f2 = [r2], 32; \ + ldf.fill.nta f3 = [r3], 32; \ + ;; \ + ldf.fill.nta f4 = [r2], 32; \ + ldf.fill.nta f5 = [r3], 32; \ + ;; \ + ldf.fill.nta f6 = [r2], 32; \ + ldf.fill.nta f7 = [r3], 32; \ + ;; \ + ldf.fill.nta f8 = [r2], 32; \ + ldf.fill.nta f9 = [r3], 32; \ + ;; \ + ldf.fill.nta f10 = [r2], 32; \ + ldf.fill.nta f11 = [r3], 32; \ + ;; \ + ldf.fill.nta f12 = [r2], 32; \ + ldf.fill.nta f13 = [r3], 32; \ + ;; \ + ldf.fill.nta f14 = [r2], 32; \ + ldf.fill.nta f15 = [r3], 32; \ + ;; \ + ldf.fill.nta f16 = [r2], 32; \ + ldf.fill.nta f17 = [r3], 32; \ + ;; \ + ldf.fill.nta f18 = [r2], 32; \ + ldf.fill.nta f19 = [r3], 32; \ + ;; \ + ldf.fill.nta f20 = [r2], 32; \ + ldf.fill.nta f21 = [r3], 32; \ + ;; \ + ldf.fill.nta f22 = [r2], 32; \ + ldf.fill.nta f23 = [r3], 32; \ + ;; \ + ldf.fill.nta f24 = [r2], 32; \ + ldf.fill.nta f25 = [r3], 32; \ + ;; \ + ldf.fill.nta f26 = [r2], 32; \ + ldf.fill.nta f27 = [r3], 32; \ + ;; \ + ldf.fill.nta f28 = [r2], 32; \ + ldf.fill.nta f29 = [r3], 32; \ + ;; \ + ldf.fill.nta f30 = [r2], 32; \ + ldf.fill.nta f31 = [r3], 32; \ + ;; + + + + /* + * r33: point to context_t structure + */ +#define RESTORE_FPU_HIGH \ + add r2 = CTX(F32), r33; \ + add r3 = CTX(F33), r33; \ + ;; \ + ldf.fill.nta f32 = [r2], 32; \ + ldf.fill.nta f33 = [r3], 32; \ + ;; \ + ldf.fill.nta f34 = [r2], 32; \ + ldf.fill.nta f35 = [r3], 32; \ + ;; \ + ldf.fill.nta f36 = [r2], 32; \ + ldf.fill.nta f37 = [r3], 32; \ + ;; \ + ldf.fill.nta f38 = [r2], 32; \ + ldf.fill.nta f39 = [r3], 32; \ + ;; \ + ldf.fill.nta f40 = [r2], 32; \ + ldf.fill.nta f41 = [r3], 32; \ + ;; \ + ldf.fill.nta f42 = [r2], 32; \ + ldf.fill.nta f43 = [r3], 32; \ + ;; \ + ldf.fill.nta f44 = [r2], 32; \ + ldf.fill.nta f45 = [r3], 32; \ + ;; \ + ldf.fill.nta f46 = [r2], 32; \ + ldf.fill.nta f47 = [r3], 32; \ + ;; \ + ldf.fill.nta f48 = [r2], 32; \ + ldf.fill.nta f49 = [r3], 32; \ + ;; \ + ldf.fill.nta f50 = [r2], 32; \ + ldf.fill.nta f51 = [r3], 32; \ + ;; \ + ldf.fill.nta f52 = [r2], 32; \ + ldf.fill.nta f53 = [r3], 32; \ + ;; \ + ldf.fill.nta f54 = [r2], 32; \ + ldf.fill.nta f55 = [r3], 32; \ + ;; \ + ldf.fill.nta f56 = [r2], 32; \ + ldf.fill.nta f57 = [r3], 32; \ + ;; \ + ldf.fill.nta f58 = [r2], 32; \ + ldf.fill.nta f59 = [r3], 32; \ + ;; \ + ldf.fill.nta f60 = [r2], 32; \ + ldf.fill.nta f61 = [r3], 32; \ + ;; \ + ldf.fill.nta f62 = [r2], 32; \ + ldf.fill.nta f63 = [r3], 32; \ + ;; \ + ldf.fill.nta f64 = [r2], 32; \ + ldf.fill.nta f65 = [r3], 32; \ + ;; \ + ldf.fill.nta f66 = [r2], 32; \ + ldf.fill.nta f67 = [r3], 32; \ + ;; \ + ldf.fill.nta f68 = [r2], 32; \ + ldf.fill.nta f69 = [r3], 32; \ + ;; \ + ldf.fill.nta f70 = [r2], 32; \ + ldf.fill.nta f71 = [r3], 32; \ + ;; \ + ldf.fill.nta f72 = [r2], 32; \ + ldf.fill.nta f73 = [r3], 32; \ + ;; \ + ldf.fill.nta f74 = [r2], 32; \ + ldf.fill.nta f75 = [r3], 32; \ + ;; \ + ldf.fill.nta f76 = [r2], 32; \ + ldf.fill.nta f77 = [r3], 32; \ + ;; \ + ldf.fill.nta f78 = [r2], 32; \ + ldf.fill.nta f79 = [r3], 32; \ + ;; \ + ldf.fill.nta f80 = [r2], 32; \ + ldf.fill.nta f81 = [r3], 32; \ + ;; \ + ldf.fill.nta f82 = [r2], 32; \ + ldf.fill.nta f83 = [r3], 32; \ + ;; \ + ldf.fill.nta f84 = [r2], 32; \ + ldf.fill.nta f85 = [r3], 32; \ + ;; \ + ldf.fill.nta f86 = [r2], 32; \ + ldf.fill.nta f87 = [r3], 32; \ + ;; \ + ldf.fill.nta f88 = [r2], 32; \ + ldf.fill.nta f89 = [r3], 32; \ + ;; \ + ldf.fill.nta f90 = [r2], 32; \ + ldf.fill.nta f91 = [r3], 32; \ + ;; \ + ldf.fill.nta f92 = [r2], 32; \ + ldf.fill.nta f93 = [r3], 32; \ + ;; \ + ldf.fill.nta f94 = [r2], 32; \ + ldf.fill.nta f95 = [r3], 32; \ + ;; \ + ldf.fill.nta f96 = [r2], 32; \ + ldf.fill.nta f97 = [r3], 32; \ + ;; \ + ldf.fill.nta f98 = [r2], 32; \ + ldf.fill.nta f99 = [r3], 32; \ + ;; \ + ldf.fill.nta f100 = [r2], 32; \ + ldf.fill.nta f101 = [r3], 32; \ + ;; \ + ldf.fill.nta f102 = [r2], 32; \ + ldf.fill.nta f103 = [r3], 32; \ + ;; \ + ldf.fill.nta f104 = [r2], 32; \ + ldf.fill.nta f105 = [r3], 32; \ + ;; \ + ldf.fill.nta f106 = [r2], 32; \ + ldf.fill.nta f107 = [r3], 32; \ + ;; \ + ldf.fill.nta f108 = [r2], 32; \ + ldf.fill.nta f109 = [r3], 32; \ + ;; \ + ldf.fill.nta f110 = [r2], 32; \ + ldf.fill.nta f111 = [r3], 32; \ + ;; \ + ldf.fill.nta f112 = [r2], 32; \ + ldf.fill.nta f113 = [r3], 32; \ + ;; \ + ldf.fill.nta f114 = [r2], 32; \ + ldf.fill.nta f115 = [r3], 32; \ + ;; \ + ldf.fill.nta f116 = [r2], 32; \ + ldf.fill.nta f117 = [r3], 32; \ + ;; \ + ldf.fill.nta f118 = [r2], 32; \ + ldf.fill.nta f119 = [r3], 32; \ + ;; \ + ldf.fill.nta f120 = [r2], 32; \ + ldf.fill.nta f121 = [r3], 32; \ + ;; \ + ldf.fill.nta f122 = [r2], 32; \ + ldf.fill.nta f123 = [r3], 32; \ + ;; \ + ldf.fill.nta f124 = [r2], 32; \ + ldf.fill.nta f125 = [r3], 32; \ + ;; \ + ldf.fill.nta f126 = [r2], 32; \ + ldf.fill.nta f127 = [r3], 32; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_PTK_REGS \ + add r2=CTX(PKR0), r32; \ + mov r16=7; \ + ;; \ + mov ar.lc=r16; \ + mov r17=r0; \ + ;; \ +1: \ + mov r18=pkr[r17]; \ + ;; \ + srlz.i; \ + ;; \ + st8 [r2]=r18, 8; \ + ;; \ + add r17 =1,r17; \ + ;; \ + br.cloop.sptk 1b; \ + ;; + +/* + * r33: point to context_t structure + * ar.lc are corrupted. + */ +#define RESTORE_PTK_REGS \ + add r2=CTX(PKR0), r33; \ + mov r16=7; \ + ;; \ + mov ar.lc=r16; \ + mov r17=r0; \ + ;; \ +1: \ + ld8 r18=[r2], 8; \ + ;; \ + mov pkr[r17]=r18; \ + ;; \ + srlz.i; \ + ;; \ + add r17 =1,r17; \ + ;; \ + br.cloop.sptk 1b; \ + ;; + + +/* + * void vmm_trampoline( context_t * from, + * context_t * to) + * + * from: r32 + * to: r33 + * note: interrupt disabled before call this function. + */ +GLOBAL_ENTRY(vmm_trampoline) + mov r16 = psr + adds r2 = CTX(PSR), r32 + ;; + st8 [r2] = r16, 8 // psr + mov r17 = pr + ;; + st8 [r2] = r17, 8 // pr + mov r18 = ar.unat + ;; + st8 [r2] = r18 + mov r17 = ar.rsc + ;; + adds r2 = CTX(RSC),r32 + ;; + st8 [r2]= r17 + mov ar.rsc =0 + flushrs + ;; + SAVE_GENERAL_REGS + ;; + SAVE_KERNEL_REGS + ;; + SAVE_APP_REGS + ;; + SAVE_BRANCH_REGS + ;; + SAVE_CTL_REGS + ;; + SAVE_REGION_REGS + ;; + //SAVE_DEBUG_REGS + ;; + rsm psr.dfl + ;; + srlz.d + ;; + SAVE_FPU_LOW + ;; + rsm psr.dfh + ;; + srlz.d + ;; + SAVE_FPU_HIGH + ;; + SAVE_PTK_REGS + ;; + RESTORE_PTK_REGS + ;; + RESTORE_FPU_HIGH + ;; + RESTORE_FPU_LOW + ;; + //RESTORE_DEBUG_REGS + ;; + RESTORE_REGION_REGS + ;; + RESTORE_CTL_REGS + ;; + RESTORE_BRANCH_REGS + ;; + RESTORE_APP_REGS + ;; + RESTORE_KERNEL_REGS + ;; + RESTORE_GENERAL_REGS + ;; + adds r2=CTX(PSR), r33 + ;; + ld8 r16=[r2], 8 // psr + ;; + mov psr.l=r16 + ;; + srlz.d + ;; + ld8 r16=[r2], 8 // pr + ;; + mov pr =r16,-1 + ld8 r16=[r2] // unat + ;; + mov ar.unat=r16 + ;; + adds r2=CTX(RSC),r33 + ;; + ld8 r16 =[r2] + ;; + mov ar.rsc = r16 + ;; + br.ret.sptk.few b0 +END(vmm_trampoline) diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c new file mode 100644 index 00000000000..e44027ce566 --- /dev/null +++ b/arch/ia64/kvm/vcpu.c @@ -0,0 +1,2163 @@ +/* + * kvm_vcpu.c: handling all virtual cpu related thing. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) <susie.li@intel.com> + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang <xiantao.zhang@intel.com> + */ + +#include <linux/kvm_host.h> +#include <linux/types.h> + +#include <asm/processor.h> +#include <asm/ia64regs.h> +#include <asm/gcc_intrin.h> +#include <asm/kregs.h> +#include <asm/pgtable.h> +#include <asm/tlb.h> + +#include "asm-offsets.h" +#include "vcpu.h" + +/* + * Special notes: + * - Index by it/dt/rt sequence + * - Only existing mode transitions are allowed in this table + * - RSE is placed at lazy mode when emulating guest partial mode + * - If gva happens to be rr0 and rr4, only allowed case is identity + * mapping (gva=gpa), or panic! (How?) + */ +int mm_switch_table[8][8] = { + /* 2004/09/12(Kevin): Allow switch to self */ + /* + * (it,dt,rt): (0,0,0) -> (1,1,1) + * This kind of transition usually occurs in the very early + * stage of Linux boot up procedure. Another case is in efi + * and pal calls. (see "arch/ia64/kernel/head.S") + * + * (it,dt,rt): (0,0,0) -> (0,1,1) + * This kind of transition is found when OSYa exits efi boot + * service. Due to gva = gpa in this case (Same region), + * data access can be satisfied though itlb entry for physical + * emulation is hit. + */ + {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + /* + * (it,dt,rt): (0,1,1) -> (1,1,1) + * This kind of transition is found in OSYa. + * + * (it,dt,rt): (0,1,1) -> (0,0,0) + * This kind of transition is found in OSYa + */ + {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V}, + /* (1,0,0)->(1,1,1) */ + {0, 0, 0, 0, 0, 0, 0, SW_P2V}, + /* + * (it,dt,rt): (1,0,1) -> (1,1,1) + * This kind of transition usually occurs when Linux returns + * from the low level TLB miss handlers. + * (see "arch/ia64/kernel/ivt.S") + */ + {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V}, + {0, 0, 0, 0, 0, 0, 0, 0}, + /* + * (it,dt,rt): (1,1,1) -> (1,0,1) + * This kind of transition usually occurs in Linux low level + * TLB miss handler. (see "arch/ia64/kernel/ivt.S") + * + * (it,dt,rt): (1,1,1) -> (0,0,0) + * This kind of transition usually occurs in pal and efi calls, + * which requires running in physical mode. + * (see "arch/ia64/kernel/head.S") + * (1,1,1)->(1,0,0) + */ + + {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF}, +}; + +void physical_mode_init(struct kvm_vcpu *vcpu) +{ + vcpu->arch.mode_flags = GUEST_IN_PHY; +} + +void switch_to_physical_rid(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + /* Save original virtual mode rr[0] and rr[4] */ + psr = ia64_clear_ic(); + ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0); + ia64_srlz_d(); + ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4); + ia64_srlz_d(); + + ia64_set_psr(psr); + return; +} + + +void switch_to_virtual_rid(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + psr = ia64_clear_ic(); + ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0); + ia64_srlz_d(); + ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4); + ia64_srlz_d(); + ia64_set_psr(psr); + return; +} + +static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr) +{ + return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)]; +} + +void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, + struct ia64_psr new_psr) +{ + int act; + act = mm_switch_action(old_psr, new_psr); + switch (act) { + case SW_V2P: + /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n", + old_psr.val, new_psr.val);*/ + switch_to_physical_rid(vcpu); + /* + * Set rse to enforced lazy, to prevent active rse + *save/restor when guest physical mode. + */ + vcpu->arch.mode_flags |= GUEST_IN_PHY; + break; + case SW_P2V: + switch_to_virtual_rid(vcpu); + /* + * recover old mode which is saved when entering + * guest physical mode + */ + vcpu->arch.mode_flags &= ~GUEST_IN_PHY; + break; + case SW_SELF: + break; + case SW_NOP: + break; + default: + /* Sanity check */ + break; + } + return; +} + + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, + struct ia64_psr new_psr) +{ + + if ((old_psr.dt != new_psr.dt) + || (old_psr.it != new_psr.it) + || (old_psr.rt != new_psr.rt)) + switch_mm_mode(vcpu, old_psr, new_psr); + + return; +} + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void prepare_if_physical_mode(struct kvm_vcpu *vcpu) +{ + if (is_physical_mode(vcpu)) { + vcpu->arch.mode_flags |= GUEST_PHY_EMUL; + switch_to_virtual_rid(vcpu); + } + return; +} + +/* Recover always follows prepare */ +void recover_if_physical_mode(struct kvm_vcpu *vcpu) +{ + if (is_physical_mode(vcpu)) + switch_to_physical_rid(vcpu); + vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL; + return; +} + +#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x) + +static u16 gr_info[32] = { + 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ + RPT(r1), RPT(r2), RPT(r3), + RPT(r4), RPT(r5), RPT(r6), RPT(r7), + RPT(r8), RPT(r9), RPT(r10), RPT(r11), + RPT(r12), RPT(r13), RPT(r14), RPT(r15), + RPT(r16), RPT(r17), RPT(r18), RPT(r19), + RPT(r20), RPT(r21), RPT(r22), RPT(r23), + RPT(r24), RPT(r25), RPT(r26), RPT(r27), + RPT(r28), RPT(r29), RPT(r30), RPT(r31) +}; + +#define IA64_FIRST_STACKED_GR 32 +#define IA64_FIRST_ROTATING_FR 32 + +static inline unsigned long +rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg) +{ + reg += rrb; + if (reg >= sor) + reg -= sor; + return reg; +} + +/* + * Return the (rotated) index for floating point register + * be in the REGNUM (REGNUM must range from 32-127, + * result is in the range from 0-95. + */ +static inline unsigned long fph_index(struct kvm_pt_regs *regs, + long regnum) +{ + unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; + return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); +} + + +/* + * The inverse of the above: given bspstore and the number of + * registers, calculate ar.bsp. + */ +static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr, + long num_regs) +{ + long delta = ia64_rse_slot_num(addr) + num_regs; + int i = 0; + + if (num_regs < 0) + delta -= 0x3e; + if (delta < 0) { + while (delta <= -0x3f) { + i--; + delta += 0x3f; + } + } else { + while (delta >= 0x3f) { + i++; + delta -= 0x3f; + } + } + + return addr + num_regs + i; +} + +static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, + unsigned long *val, int *nat) +{ + unsigned long *bsp, *addr, *rnat_addr, *bspstore; + unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; + unsigned long nat_mask; + unsigned long old_rsc, new_rsc; + long sof = (regs->cr_ifs) & 0x7f; + long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + old_rsc = ia64_getreg(_IA64_REG_AR_RSC); + new_rsc = old_rsc&(~(0x3)); + ia64_setreg(_IA64_REG_AR_RSC, new_rsc); + + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + bsp = kbs + (regs->loadrs >> 19); + + addr = kvm_rse_skip_regs(bsp, -sof + ridx); + nat_mask = 1UL << ia64_rse_slot_num(addr); + rnat_addr = ia64_rse_rnat_addr(addr); + + if (addr >= bspstore) { + ia64_flushrs(); + ia64_mf(); + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + } + *val = *addr; + if (nat) { + if (bspstore < rnat_addr) + *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT) + & nat_mask); + else + *nat = (int)!!((*rnat_addr) & nat_mask); + ia64_setreg(_IA64_REG_AR_RSC, old_rsc); + } +} + +void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, + unsigned long val, unsigned long nat) +{ + unsigned long *bsp, *bspstore, *addr, *rnat_addr; + unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; + unsigned long nat_mask; + unsigned long old_rsc, new_rsc, psr; + unsigned long rnat; + long sof = (regs->cr_ifs) & 0x7f; + long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + old_rsc = ia64_getreg(_IA64_REG_AR_RSC); + /* put RSC to lazy mode, and set loadrs 0 */ + new_rsc = old_rsc & (~0x3fff0003); + ia64_setreg(_IA64_REG_AR_RSC, new_rsc); + bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */ + + addr = kvm_rse_skip_regs(bsp, -sof + ridx); + nat_mask = 1UL << ia64_rse_slot_num(addr); + rnat_addr = ia64_rse_rnat_addr(addr); + + local_irq_save(psr); + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + if (addr >= bspstore) { + + ia64_flushrs(); + ia64_mf(); + *addr = val; + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + rnat = ia64_getreg(_IA64_REG_AR_RNAT); + if (bspstore < rnat_addr) + rnat = rnat & (~nat_mask); + else + *rnat_addr = (*rnat_addr)&(~nat_mask); + + ia64_mf(); + ia64_loadrs(); + ia64_setreg(_IA64_REG_AR_RNAT, rnat); + } else { + rnat = ia64_getreg(_IA64_REG_AR_RNAT); + *addr = val; + if (bspstore < rnat_addr) + rnat = rnat&(~nat_mask); + else + *rnat_addr = (*rnat_addr) & (~nat_mask); + + ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore); + ia64_setreg(_IA64_REG_AR_RNAT, rnat); + } + local_irq_restore(psr); + ia64_setreg(_IA64_REG_AR_RSC, old_rsc); +} + +void getreg(unsigned long regnum, unsigned long *val, + int *nat, struct kvm_pt_regs *regs) +{ + unsigned long addr, *unat; + if (regnum >= IA64_FIRST_STACKED_GR) { + get_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + addr = (unsigned long)regs; + unat = ®s->eml_unat;; + + addr += gr_info[regnum]; + + *val = *(unsigned long *)addr; + /* + * do it only when requested + */ + if (nat) + *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL; +} + +void setreg(unsigned long regnum, unsigned long val, + int nat, struct kvm_pt_regs *regs) +{ + unsigned long addr; + unsigned long bitmask; + unsigned long *unat; + + /* + * First takes care of stacked registers + */ + if (regnum >= IA64_FIRST_STACKED_GR) { + set_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + addr = (unsigned long)regs; + unat = ®s->eml_unat; + /* + * add offset from base of struct + * and do it ! + */ + addr += gr_info[regnum]; + + *(unsigned long *)addr = val; + + /* + * We need to clear the corresponding UNAT bit to fully emulate the load + * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 + */ + bitmask = 1UL << ((addr >> 3) & 0x3f); + if (nat) + *unat |= bitmask; + else + *unat &= ~bitmask; + +} + +u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + u64 val; + + if (!reg) + return 0; + getreg(reg, &val, 0, regs); + return val; +} + +void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + long sof = (regs->cr_ifs) & 0x7f; + + if (!reg) + return; + if (reg >= sof + 32) + return; + setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/ +} + +void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, + struct kvm_pt_regs *regs) +{ + /* Take floating register rotation into consideration*/ + if (regnum >= IA64_FIRST_ROTATING_FR) + regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); +#define CASE_FIXED_FP(reg) \ + case (reg) : \ + ia64_stf_spill(fpval, reg); \ + break + + switch (regnum) { + CASE_FIXED_FP(0); + CASE_FIXED_FP(1); + CASE_FIXED_FP(2); + CASE_FIXED_FP(3); + CASE_FIXED_FP(4); + CASE_FIXED_FP(5); + + CASE_FIXED_FP(6); + CASE_FIXED_FP(7); + CASE_FIXED_FP(8); + CASE_FIXED_FP(9); + CASE_FIXED_FP(10); + CASE_FIXED_FP(11); + + CASE_FIXED_FP(12); + CASE_FIXED_FP(13); + CASE_FIXED_FP(14); + CASE_FIXED_FP(15); + CASE_FIXED_FP(16); + CASE_FIXED_FP(17); + CASE_FIXED_FP(18); + CASE_FIXED_FP(19); + CASE_FIXED_FP(20); + CASE_FIXED_FP(21); + CASE_FIXED_FP(22); + CASE_FIXED_FP(23); + CASE_FIXED_FP(24); + CASE_FIXED_FP(25); + CASE_FIXED_FP(26); + CASE_FIXED_FP(27); + CASE_FIXED_FP(28); + CASE_FIXED_FP(29); + CASE_FIXED_FP(30); + CASE_FIXED_FP(31); + CASE_FIXED_FP(32); + CASE_FIXED_FP(33); + CASE_FIXED_FP(34); + CASE_FIXED_FP(35); + CASE_FIXED_FP(36); + CASE_FIXED_FP(37); + CASE_FIXED_FP(38); + CASE_FIXED_FP(39); + CASE_FIXED_FP(40); + CASE_FIXED_FP(41); + CASE_FIXED_FP(42); + CASE_FIXED_FP(43); + CASE_FIXED_FP(44); + CASE_FIXED_FP(45); + CASE_FIXED_FP(46); + CASE_FIXED_FP(47); + CASE_FIXED_FP(48); + CASE_FIXED_FP(49); + CASE_FIXED_FP(50); + CASE_FIXED_FP(51); + CASE_FIXED_FP(52); + CASE_FIXED_FP(53); + CASE_FIXED_FP(54); + CASE_FIXED_FP(55); + CASE_FIXED_FP(56); + CASE_FIXED_FP(57); + CASE_FIXED_FP(58); + CASE_FIXED_FP(59); + CASE_FIXED_FP(60); + CASE_FIXED_FP(61); + CASE_FIXED_FP(62); + CASE_FIXED_FP(63); + CASE_FIXED_FP(64); + CASE_FIXED_FP(65); + CASE_FIXED_FP(66); + CASE_FIXED_FP(67); + CASE_FIXED_FP(68); + CASE_FIXED_FP(69); + CASE_FIXED_FP(70); + CASE_FIXED_FP(71); + CASE_FIXED_FP(72); + CASE_FIXED_FP(73); + CASE_FIXED_FP(74); + CASE_FIXED_FP(75); + CASE_FIXED_FP(76); + CASE_FIXED_FP(77); + CASE_FIXED_FP(78); + CASE_FIXED_FP(79); + CASE_FIXED_FP(80); + CASE_FIXED_FP(81); + CASE_FIXED_FP(82); + CASE_FIXED_FP(83); + CASE_FIXED_FP(84); + CASE_FIXED_FP(85); + CASE_FIXED_FP(86); + CASE_FIXED_FP(87); + CASE_FIXED_FP(88); + CASE_FIXED_FP(89); + CASE_FIXED_FP(90); + CASE_FIXED_FP(91); + CASE_FIXED_FP(92); + CASE_FIXED_FP(93); + CASE_FIXED_FP(94); + CASE_FIXED_FP(95); + CASE_FIXED_FP(96); + CASE_FIXED_FP(97); + CASE_FIXED_FP(98); + CASE_FIXED_FP(99); + CASE_FIXED_FP(100); + CASE_FIXED_FP(101); + CASE_FIXED_FP(102); + CASE_FIXED_FP(103); + CASE_FIXED_FP(104); + CASE_FIXED_FP(105); + CASE_FIXED_FP(106); + CASE_FIXED_FP(107); + CASE_FIXED_FP(108); + CASE_FIXED_FP(109); + CASE_FIXED_FP(110); + CASE_FIXED_FP(111); + CASE_FIXED_FP(112); + CASE_FIXED_FP(113); + CASE_FIXED_FP(114); + CASE_FIXED_FP(115); + CASE_FIXED_FP(116); + CASE_FIXED_FP(117); + CASE_FIXED_FP(118); + CASE_FIXED_FP(119); + CASE_FIXED_FP(120); + CASE_FIXED_FP(121); + CASE_FIXED_FP(122); + CASE_FIXED_FP(123); + CASE_FIXED_FP(124); + CASE_FIXED_FP(125); + CASE_FIXED_FP(126); + CASE_FIXED_FP(127); + } +#undef CASE_FIXED_FP +} + +void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, + struct kvm_pt_regs *regs) +{ + /* Take floating register rotation into consideration*/ + if (regnum >= IA64_FIRST_ROTATING_FR) + regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); + +#define CASE_FIXED_FP(reg) \ + case (reg) : \ + ia64_ldf_fill(reg, fpval); \ + break + + switch (regnum) { + CASE_FIXED_FP(2); + CASE_FIXED_FP(3); + CASE_FIXED_FP(4); + CASE_FIXED_FP(5); + + CASE_FIXED_FP(6); + CASE_FIXED_FP(7); + CASE_FIXED_FP(8); + CASE_FIXED_FP(9); + CASE_FIXED_FP(10); + CASE_FIXED_FP(11); + + CASE_FIXED_FP(12); + CASE_FIXED_FP(13); + CASE_FIXED_FP(14); + CASE_FIXED_FP(15); + CASE_FIXED_FP(16); + CASE_FIXED_FP(17); + CASE_FIXED_FP(18); + CASE_FIXED_FP(19); + CASE_FIXED_FP(20); + CASE_FIXED_FP(21); + CASE_FIXED_FP(22); + CASE_FIXED_FP(23); + CASE_FIXED_FP(24); + CASE_FIXED_FP(25); + CASE_FIXED_FP(26); + CASE_FIXED_FP(27); + CASE_FIXED_FP(28); + CASE_FIXED_FP(29); + CASE_FIXED_FP(30); + CASE_FIXED_FP(31); + CASE_FIXED_FP(32); + CASE_FIXED_FP(33); + CASE_FIXED_FP(34); + CASE_FIXED_FP(35); + CASE_FIXED_FP(36); + CASE_FIXED_FP(37); + CASE_FIXED_FP(38); + CASE_FIXED_FP(39); + CASE_FIXED_FP(40); + CASE_FIXED_FP(41); + CASE_FIXED_FP(42); + CASE_FIXED_FP(43); + CASE_FIXED_FP(44); + CASE_FIXED_FP(45); + CASE_FIXED_FP(46); + CASE_FIXED_FP(47); + CASE_FIXED_FP(48); + CASE_FIXED_FP(49); + CASE_FIXED_FP(50); + CASE_FIXED_FP(51); + CASE_FIXED_FP(52); + CASE_FIXED_FP(53); + CASE_FIXED_FP(54); + CASE_FIXED_FP(55); + CASE_FIXED_FP(56); + CASE_FIXED_FP(57); + CASE_FIXED_FP(58); + CASE_FIXED_FP(59); + CASE_FIXED_FP(60); + CASE_FIXED_FP(61); + CASE_FIXED_FP(62); + CASE_FIXED_FP(63); + CASE_FIXED_FP(64); + CASE_FIXED_FP(65); + CASE_FIXED_FP(66); + CASE_FIXED_FP(67); + CASE_FIXED_FP(68); + CASE_FIXED_FP(69); + CASE_FIXED_FP(70); + CASE_FIXED_FP(71); + CASE_FIXED_FP(72); + CASE_FIXED_FP(73); + CASE_FIXED_FP(74); + CASE_FIXED_FP(75); + CASE_FIXED_FP(76); + CASE_FIXED_FP(77); + CASE_FIXED_FP(78); + CASE_FIXED_FP(79); + CASE_FIXED_FP(80); + CASE_FIXED_FP(81); + CASE_FIXED_FP(82); + CASE_FIXED_FP(83); + CASE_FIXED_FP(84); + CASE_FIXED_FP(85); + CASE_FIXED_FP(86); + CASE_FIXED_FP(87); + CASE_FIXED_FP(88); + CASE_FIXED_FP(89); + CASE_FIXED_FP(90); + CASE_FIXED_FP(91); + CASE_FIXED_FP(92); + CASE_FIXED_FP(93); + CASE_FIXED_FP(94); + CASE_FIXED_FP(95); + CASE_FIXED_FP(96); + CASE_FIXED_FP(97); + CASE_FIXED_FP(98); + CASE_FIXED_FP(99); + CASE_FIXED_FP(100); + CASE_FIXED_FP(101); + CASE_FIXED_FP(102); + CASE_FIXED_FP(103); + CASE_FIXED_FP(104); + CASE_FIXED_FP(105); + CASE_FIXED_FP(106); + CASE_FIXED_FP(107); + CASE_FIXED_FP(108); + CASE_FIXED_FP(109); + CASE_FIXED_FP(110); + CASE_FIXED_FP(111); + CASE_FIXED_FP(112); + CASE_FIXED_FP(113); + CASE_FIXED_FP(114); + CASE_FIXED_FP(115); + CASE_FIXED_FP(116); + CASE_FIXED_FP(117); + CASE_FIXED_FP(118); + CASE_FIXED_FP(119); + CASE_FIXED_FP(120); + CASE_FIXED_FP(121); + CASE_FIXED_FP(122); + CASE_FIXED_FP(123); + CASE_FIXED_FP(124); + CASE_FIXED_FP(125); + CASE_FIXED_FP(126); + CASE_FIXED_FP(127); + } +} + +void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, + struct ia64_fpreg *val) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + getfpreg(reg, val, regs); /* FIXME: handle NATs later*/ +} + +void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, + struct ia64_fpreg *val) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + if (reg > 1) + setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ +} + +/************************************************************************ + * lsapic timer + ***********************************************************************/ +u64 vcpu_get_itc(struct kvm_vcpu *vcpu) +{ + unsigned long guest_itc; + guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC); + + if (guest_itc >= VMX(vcpu, last_itc)) { + VMX(vcpu, last_itc) = guest_itc; + return guest_itc; + } else + return VMX(vcpu, last_itc); +} + +static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val); +static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) +{ + struct kvm_vcpu *v; + int i; + long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); + unsigned long vitv = VCPU(vcpu, itv); + + if (vcpu->vcpu_id == 0) { + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + VMX(v, itc_offset) = itc_offset; + VMX(v, last_itc) = 0; + } + } + VMX(vcpu, last_itc) = 0; + if (VCPU(vcpu, itm) <= val) { + VMX(vcpu, itc_check) = 0; + vcpu_unpend_interrupt(vcpu, vitv); + } else { + VMX(vcpu, itc_check) = 1; + vcpu_set_itm(vcpu, VCPU(vcpu, itm)); + } + +} + +static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, itm)); +} + +static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val) +{ + unsigned long vitv = VCPU(vcpu, itv); + VCPU(vcpu, itm) = val; + + if (val > vcpu_get_itc(vcpu)) { + VMX(vcpu, itc_check) = 1; + vcpu_unpend_interrupt(vcpu, vitv); + VMX(vcpu, timer_pending) = 0; + } else + VMX(vcpu, itc_check) = 0; +} + +#define ITV_VECTOR(itv) (itv&0xff) +#define ITV_IRQ_MASK(itv) (itv&(1<<16)) + +static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, itv) = val; + if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) { + vcpu_pend_interrupt(vcpu, ITV_VECTOR(val)); + vcpu->arch.timer_pending = 0; + } +} + +static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val) +{ + int vec; + + vec = highest_inservice_irq(vcpu); + if (vec == NULL_VECTOR) + return; + VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63)); + VCPU(vcpu, eoi) = 0; + vcpu->arch.irq_new_pending = 1; + +} + +/* See Table 5-8 in SDM vol2 for the definition */ +int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice) +{ + union ia64_tpr vtpr; + + vtpr.val = VCPU(vcpu, tpr); + + if (h_inservice == NMI_VECTOR) + return IRQ_MASKED_BY_INSVC; + + if (h_pending == NMI_VECTOR) { + /* Non Maskable Interrupt */ + return IRQ_NO_MASKED; + } + + if (h_inservice == ExtINT_VECTOR) + return IRQ_MASKED_BY_INSVC; + + if (h_pending == ExtINT_VECTOR) { + if (vtpr.mmi) { + /* mask all external IRQ */ + return IRQ_MASKED_BY_VTPR; + } else + return IRQ_NO_MASKED; + } + + if (is_higher_irq(h_pending, h_inservice)) { + if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4))) + return IRQ_NO_MASKED; + else + return IRQ_MASKED_BY_VTPR; + } else { + return IRQ_MASKED_BY_INSVC; + } +} + +void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec) +{ + long spsr; + int ret; + + local_irq_save(spsr); + ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0])); + local_irq_restore(spsr); + + vcpu->arch.irq_new_pending = 1; +} + +void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec) +{ + long spsr; + int ret; + + local_irq_save(spsr); + ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0])); + local_irq_restore(spsr); + if (ret) { + vcpu->arch.irq_new_pending = 1; + wmb(); + } +} + +void update_vhpi(struct kvm_vcpu *vcpu, int vec) +{ + u64 vhpi; + + if (vec == NULL_VECTOR) + vhpi = 0; + else if (vec == NMI_VECTOR) + vhpi = 32; + else if (vec == ExtINT_VECTOR) + vhpi = 16; + else + vhpi = vec >> 4; + + VCPU(vcpu, vhpi) = vhpi; + if (VCPU(vcpu, vac).a_int) + ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT, + (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0); +} + +u64 vcpu_get_ivr(struct kvm_vcpu *vcpu) +{ + int vec, h_inservice, mask; + + vec = highest_pending_irq(vcpu); + h_inservice = highest_inservice_irq(vcpu); + mask = irq_masked(vcpu, vec, h_inservice); + if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) { + if (VCPU(vcpu, vhpi)) + update_vhpi(vcpu, NULL_VECTOR); + return IA64_SPURIOUS_INT_VECTOR; + } + if (mask == IRQ_MASKED_BY_VTPR) { + update_vhpi(vcpu, vec); + return IA64_SPURIOUS_INT_VECTOR; + } + VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63)); + vcpu_unpend_interrupt(vcpu, vec); + return (u64)vec; +} + +/************************************************************************** + Privileged operation emulation routines + **************************************************************************/ +u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr) +{ + union ia64_pta vpta; + union ia64_rr vrr; + u64 pval; + u64 vhpt_offset; + + vpta.val = vcpu_get_pta(vcpu); + vrr.val = vcpu_get_rr(vcpu, vadr); + vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1); + if (vpta.vf) { + pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val, + vpta.val, 0, 0, 0, 0); + } else { + pval = (vadr & VRN_MASK) | vhpt_offset | + (vpta.val << 3 >> (vpta.size + 3) << (vpta.size)); + } + return pval; +} + +u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr) +{ + union ia64_rr vrr; + union ia64_pta vpta; + u64 pval; + + vpta.val = vcpu_get_pta(vcpu); + vrr.val = vcpu_get_rr(vcpu, vadr); + if (vpta.vf) { + pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val, + 0, 0, 0, 0, 0); + } else + pval = 1; + + return pval; +} + +u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr) +{ + struct thash_data *data; + union ia64_pta vpta; + u64 key; + + vpta.val = vcpu_get_pta(vcpu); + if (vpta.vf == 0) { + key = 1; + return key; + } + data = vtlb_lookup(vcpu, vadr, D_TLB); + if (!data || !data->p) + key = 1; + else + key = data->key; + + return key; +} + + + +void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long thash, vadr; + + vadr = vcpu_get_gr(vcpu, inst.M46.r3); + thash = vcpu_thash(vcpu, vadr); + vcpu_set_gr(vcpu, inst.M46.r1, thash, 0); +} + + +void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long tag, vadr; + + vadr = vcpu_get_gr(vcpu, inst.M46.r3); + tag = vcpu_ttag(vcpu, vadr); + vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); +} + +int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr) +{ + struct thash_data *data; + union ia64_isr visr, pt_isr; + struct kvm_pt_regs *regs; + struct ia64_psr vpsr; + + regs = vcpu_regs(vcpu); + pt_isr.val = VMX(vcpu, cr_isr); + visr.val = 0; + visr.ei = pt_isr.ei; + visr.ir = pt_isr.ir; + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + visr.na = 1; + + data = vhpt_lookup(vadr); + if (data) { + if (data->p == 0) { + vcpu_set_isr(vcpu, visr.val); + data_page_not_present(vcpu, vadr); + return IA64_FAULT; + } else if (data->ma == VA_MATTR_NATPAGE) { + vcpu_set_isr(vcpu, visr.val); + dnat_page_consumption(vcpu, vadr); + return IA64_FAULT; + } else { + *padr = (data->gpaddr >> data->ps << data->ps) | + (vadr & (PSIZE(data->ps) - 1)); + return IA64_NO_FAULT; + } + } + + data = vtlb_lookup(vcpu, vadr, D_TLB); + if (data) { + if (data->p == 0) { + vcpu_set_isr(vcpu, visr.val); + data_page_not_present(vcpu, vadr); + return IA64_FAULT; + } else if (data->ma == VA_MATTR_NATPAGE) { + vcpu_set_isr(vcpu, visr.val); + dnat_page_consumption(vcpu, vadr); + return IA64_FAULT; + } else{ + *padr = ((data->ppn >> (data->ps - 12)) << data->ps) + | (vadr & (PSIZE(data->ps) - 1)); + return IA64_NO_FAULT; + } + } + if (!vhpt_enabled(vcpu, vadr, NA_REF)) { + if (vpsr.ic) { + vcpu_set_isr(vcpu, visr.val); + alt_dtlb(vcpu, vadr); + return IA64_FAULT; + } else { + nested_dtlb(vcpu); + return IA64_FAULT; + } + } else { + if (vpsr.ic) { + vcpu_set_isr(vcpu, visr.val); + dvhpt_fault(vcpu, vadr); + return IA64_FAULT; + } else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + + return IA64_NO_FAULT; +} + + +int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1, r3; + + r3 = vcpu_get_gr(vcpu, inst.M46.r3); + + if (vcpu_tpa(vcpu, r3, &r1)) + return IA64_FAULT; + + vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + +void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1, r3; + + r3 = vcpu_get_gr(vcpu, inst.M46.r3); + r1 = vcpu_tak(vcpu, r3); + vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); +} + + +/************************************ + * Insert/Purge translation register/cache + ************************************/ +void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) +{ + thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB); +} + +void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) +{ + thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB); +} + +void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) +{ + u64 ps, va, rid; + struct thash_data *p_itr; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + pte &= ~PAGE_FLAGS_RV_MASK; + rid = vcpu_get_rr(vcpu, ifa); + rid = rid & RR_RID_MASK; + p_itr = (struct thash_data *)&vcpu->arch.itrs[slot]; + vcpu_set_tr(p_itr, pte, itir, va, rid); + vcpu_quick_region_set(VMX(vcpu, itr_regions), va); +} + + +void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) +{ + u64 gpfn; + u64 ps, va, rid; + struct thash_data *p_dtr; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + pte &= ~PAGE_FLAGS_RV_MASK; + + if (ps != _PAGE_SIZE_16M) + thash_purge_entries(vcpu, va, ps); + gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; + if (__gpfn_is_io(gpfn)) + pte |= VTLB_PTE_IO; + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK; + p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot]; + vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot], + pte, itir, va, rid); + vcpu_quick_region_set(VMX(vcpu, dtr_regions), va); +} + +void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) +{ + int index; + u64 va; + + va = PAGEALIGN(ifa, ps); + while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0) + vcpu->arch.dtrs[index].page_flags = 0; + + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) +{ + int index; + u64 va; + + va = PAGEALIGN(ifa, ps); + while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0) + vcpu->arch.itrs[index].page_flags = 0; + + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + va = PAGEALIGN(va, ps); + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va) +{ + thash_purge_all(vcpu); +} + +void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + long psr; + local_irq_save(psr); + p->exit_reason = EXIT_REASON_PTC_G; + + p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va); + p->u.ptc_g_data.vaddr = va; + p->u.ptc_g_data.ps = ps; + vmm_transition(vcpu); + /* Do Local Purge Here*/ + vcpu_ptc_l(vcpu, va, ps); + local_irq_restore(psr); +} + + +void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + vcpu_ptc_ga(vcpu, va, ps); +} + +void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + vcpu_ptc_e(vcpu, ifa); +} + +void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_g(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_ga(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_l(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptr_d(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptr_i(vcpu, ifa, itir_ps(itir)); +} + +void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte, slot; + + slot = vcpu_get_gr(vcpu, inst.M45.r3); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + vcpu_itr_d(vcpu, slot, pte, itir, ifa); +} + + + +void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte, slot; + + slot = vcpu_get_gr(vcpu, inst.M45.r3); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + vcpu_itr_i(vcpu, slot, pte, itir, ifa); +} + +void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte; + + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_itc_d(vcpu, pte, itir, ifa); +} + +void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte; + + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_itc_i(vcpu, pte, itir, ifa); +} + +/************************************* + * Moves to semi-privileged registers + *************************************/ + +void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long imm; + + if (inst.M30.s) + imm = -inst.M30.imm; + else + imm = inst.M30.imm; + + vcpu_set_itc(vcpu, imm); +} + +void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r2; + + r2 = vcpu_get_gr(vcpu, inst.M29.r2); + vcpu_set_itc(vcpu, r2); +} + + +void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1; + + r1 = vcpu_get_itc(vcpu); + vcpu_set_gr(vcpu, inst.M31.r1, r1, 0); +} +/************************************************************************** + struct kvm_vcpu*protection key register access routines + **************************************************************************/ + +unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg) +{ + return ((unsigned long)ia64_get_pkr(reg)); +} + +void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val) +{ + ia64_set_pkr(reg, val); +} + + +unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa) +{ + union ia64_rr rr, rr1; + + rr.val = vcpu_get_rr(vcpu, ifa); + rr1.val = 0; + rr1.ps = rr.ps; + rr1.rid = rr.rid; + return (rr1.val); +} + + + +/******************************** + * Moves to privileged registers + ********************************/ +unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg, + unsigned long val) +{ + union ia64_rr oldrr, newrr; + unsigned long rrval; + struct exit_ctl_data *p = &vcpu->arch.exit_data; + unsigned long psr; + + oldrr.val = vcpu_get_rr(vcpu, reg); + newrr.val = val; + vcpu->arch.vrr[reg >> VRN_SHIFT] = val; + + switch ((unsigned long)(reg >> VRN_SHIFT)) { + case VRN6: + vcpu->arch.vmm_rr = vrrtomrr(val); + local_irq_save(psr); + p->exit_reason = EXIT_REASON_SWITCH_RR6; + vmm_transition(vcpu); + local_irq_restore(psr); + break; + case VRN4: + rrval = vrrtomrr(val); + vcpu->arch.metaphysical_saved_rr4 = rrval; + if (!is_physical_mode(vcpu)) + ia64_set_rr(reg, rrval); + break; + case VRN0: + rrval = vrrtomrr(val); + vcpu->arch.metaphysical_saved_rr0 = rrval; + if (!is_physical_mode(vcpu)) + ia64_set_rr(reg, rrval); + break; + default: + ia64_set_rr(reg, vrrtomrr(val)); + break; + } + + return (IA64_NO_FAULT); +} + + + +void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_rr(vcpu, r3, r2); +} + +void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst) +{ +} + +void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst) +{ +} + +void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pmc(vcpu, r3, r2); +} + +void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pmd(vcpu, r3, r2); +} + +void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst) +{ + u64 r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pkr(vcpu, r3, r2); +} + + + +void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_rr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_pkr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_dbr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_ibr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_pmc(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + + +unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg) +{ + /* FIXME: This could get called as a result of a rsvd-reg fault */ + if (reg > (ia64_get_cpuid(3) & 0xff)) + return 0; + else + return ia64_get_cpuid(reg); +} + +void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_cpuid(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val) +{ + VCPU(vcpu, tpr) = val; + vcpu->arch.irq_check = 1; +} + +unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r2; + + r2 = vcpu_get_gr(vcpu, inst.M32.r2); + VCPU(vcpu, vcr[inst.M32.cr3]) = r2; + + switch (inst.M32.cr3) { + case 0: + vcpu_set_dcr(vcpu, r2); + break; + case 1: + vcpu_set_itm(vcpu, r2); + break; + case 66: + vcpu_set_tpr(vcpu, r2); + break; + case 67: + vcpu_set_eoi(vcpu, r2); + break; + default: + break; + } + + return 0; +} + + +unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long tgt = inst.M33.r1; + unsigned long val; + + switch (inst.M33.cr3) { + case 65: + val = vcpu_get_ivr(vcpu); + vcpu_set_gr(vcpu, tgt, val, 0); + break; + + case 67: + vcpu_set_gr(vcpu, tgt, 0L, 0); + break; + default: + val = VCPU(vcpu, vcr[inst.M33.cr3]); + vcpu_set_gr(vcpu, tgt, val, 0); + break; + } + + return 0; +} + + + +void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) +{ + + unsigned long mask; + struct kvm_pt_regs *regs; + struct ia64_psr old_psr, new_psr; + + old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + regs = vcpu_regs(vcpu); + /* We only support guest as: + * vpsr.pk = 0 + * vpsr.is = 0 + * Otherwise panic + */ + if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) + panic_vm(vcpu); + + /* + * For those IA64_PSR bits: id/da/dd/ss/ed/ia + * Since these bits will become 0, after success execution of each + * instruction, we will change set them to mIA64_PSR + */ + VCPU(vcpu, vpsr) = val + & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA)); + + if (!old_psr.i && (val & IA64_PSR_I)) { + /* vpsr.i 0->1 */ + vcpu->arch.irq_check = 1; + } + new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + /* + * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr) + * , except for the following bits: + * ic/i/dt/si/rt/mc/it/bn/vm + */ + mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI + + IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN + + IA64_PSR_VM; + + regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask)); + + check_mm_mode_switch(vcpu, old_psr, new_psr); + + return ; +} + +unsigned long vcpu_cover(struct kvm_vcpu *vcpu) +{ + struct ia64_psr vpsr; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + if (!vpsr.ic) + VCPU(vcpu, ifs) = regs->cr_ifs; + regs->cr_ifs = IA64_IFS_V; + return (IA64_NO_FAULT); +} + + + +/************************************************************************** + VCPU banked general register access routines + **************************************************************************/ +#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ + do { \ + __asm__ __volatile__ ( \ + ";;extr.u %0 = %3,%6,16;;\n" \ + "dep %1 = %0, %1, 0, 16;;\n" \ + "st8 [%4] = %1\n" \ + "extr.u %0 = %2, 16, 16;;\n" \ + "dep %3 = %0, %3, %6, 16;;\n" \ + "st8 [%5] = %3\n" \ + ::"r"(i), "r"(*b1unat), "r"(*b0unat), \ + "r"(*runat), "r"(b1unat), "r"(runat), \ + "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ + } while (0) + +void vcpu_bsw0(struct kvm_vcpu *vcpu) +{ + unsigned long i; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + unsigned long *r = ®s->r16; + unsigned long *b0 = &VCPU(vcpu, vbgr[0]); + unsigned long *b1 = &VCPU(vcpu, vgr[0]); + unsigned long *runat = ®s->eml_unat; + unsigned long *b0unat = &VCPU(vcpu, vbnat); + unsigned long *b1unat = &VCPU(vcpu, vnat); + + + if (VCPU(vcpu, vpsr) & IA64_PSR_BN) { + for (i = 0; i < 16; i++) { + *b1++ = *r; + *r++ = *b0++; + } + vcpu_bsw0_unat(i, b0unat, b1unat, runat, + VMM_PT_REGS_R16_SLOT); + VCPU(vcpu, vpsr) &= ~IA64_PSR_BN; + } +} + +#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ + do { \ + __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \ + "dep %1 = %0, %1, 16, 16;;\n" \ + "st8 [%4] = %1\n" \ + "extr.u %0 = %2, 0, 16;;\n" \ + "dep %3 = %0, %3, %6, 16;;\n" \ + "st8 [%5] = %3\n" \ + ::"r"(i), "r"(*b0unat), "r"(*b1unat), \ + "r"(*runat), "r"(b0unat), "r"(runat), \ + "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ + } while (0) + +void vcpu_bsw1(struct kvm_vcpu *vcpu) +{ + unsigned long i; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + unsigned long *r = ®s->r16; + unsigned long *b0 = &VCPU(vcpu, vbgr[0]); + unsigned long *b1 = &VCPU(vcpu, vgr[0]); + unsigned long *runat = ®s->eml_unat; + unsigned long *b0unat = &VCPU(vcpu, vbnat); + unsigned long *b1unat = &VCPU(vcpu, vnat); + + if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) { + for (i = 0; i < 16; i++) { + *b0++ = *r; + *r++ = *b1++; + } + vcpu_bsw1_unat(i, b0unat, b1unat, runat, + VMM_PT_REGS_R16_SLOT); + VCPU(vcpu, vpsr) |= IA64_PSR_BN; + } +} + + + + +void vcpu_rfi(struct kvm_vcpu *vcpu) +{ + unsigned long ifs, psr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + psr = VCPU(vcpu, ipsr); + if (psr & IA64_PSR_BN) + vcpu_bsw1(vcpu); + else + vcpu_bsw0(vcpu); + vcpu_set_psr(vcpu, psr); + ifs = VCPU(vcpu, ifs); + if (ifs >> 63) + regs->cr_ifs = ifs; + regs->cr_iip = VCPU(vcpu, iip); +} + + +/* + VPSR can't keep track of below bits of guest PSR + This function gets guest PSR + */ + +unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu) +{ + unsigned long mask; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | + IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI; + return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask); +} + +void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long vpsr; + unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21) + | inst.M44.imm; + + vpsr = vcpu_get_psr(vcpu); + vpsr &= (~imm24); + vcpu_set_psr(vcpu, vpsr); +} + +void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long vpsr; + unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21) + | inst.M44.imm; + + vpsr = vcpu_get_psr(vcpu); + vpsr |= imm24; + vcpu_set_psr(vcpu, vpsr); +} + +/* Generate Mask + * Parameter: + * bit -- starting bit + * len -- how many bits + */ +#define MASK(bit,len) \ +({ \ + __u64 ret; \ + \ + __asm __volatile("dep %0=-1, r0, %1, %2"\ + : "=r" (ret): \ + "M" (bit), \ + "M" (len)); \ + ret; \ +}) + +void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val) +{ + val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32)); + vcpu_set_psr(vcpu, val); +} + +void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long val; + + val = vcpu_get_gr(vcpu, inst.M35.r2); + vcpu_set_psr_l(vcpu, val); +} + +void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long val; + + val = vcpu_get_psr(vcpu); + val = (val & MASK(0, 32)) | (val & MASK(35, 2)); + vcpu_set_gr(vcpu, inst.M33.r1, val, 0); +} + +void vcpu_increment_iip(struct kvm_vcpu *vcpu) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; + if (ipsr->ri == 2) { + ipsr->ri = 0; + regs->cr_iip += 16; + } else + ipsr->ri++; +} + +void vcpu_decrement_iip(struct kvm_vcpu *vcpu) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; + + if (ipsr->ri == 0) { + ipsr->ri = 2; + regs->cr_iip -= 16; + } else + ipsr->ri--; +} + +/** Emulate a privileged operation. + * + * + * @param vcpu virtual cpu + * @cause the reason cause virtualization fault + * @opcode the instruction code which cause virtualization fault + */ + +void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs) +{ + unsigned long status, cause, opcode ; + INST64 inst; + + status = IA64_NO_FAULT; + cause = VMX(vcpu, cause); + opcode = VMX(vcpu, opcode); + inst.inst = opcode; + /* + * Switch to actual virtual rid in rr0 and rr4, + * which is required by some tlb related instructions. + */ + prepare_if_physical_mode(vcpu); + + switch (cause) { + case EVENT_RSM: + kvm_rsm(vcpu, inst); + break; + case EVENT_SSM: + kvm_ssm(vcpu, inst); + break; + case EVENT_MOV_TO_PSR: + kvm_mov_to_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_PSR: + kvm_mov_from_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_CR: + kvm_mov_from_cr(vcpu, inst); + break; + case EVENT_MOV_TO_CR: + kvm_mov_to_cr(vcpu, inst); + break; + case EVENT_BSW_0: + vcpu_bsw0(vcpu); + break; + case EVENT_BSW_1: + vcpu_bsw1(vcpu); + break; + case EVENT_COVER: + vcpu_cover(vcpu); + break; + case EVENT_RFI: + vcpu_rfi(vcpu); + break; + case EVENT_ITR_D: + kvm_itr_d(vcpu, inst); + break; + case EVENT_ITR_I: + kvm_itr_i(vcpu, inst); + break; + case EVENT_PTR_D: + kvm_ptr_d(vcpu, inst); + break; + case EVENT_PTR_I: + kvm_ptr_i(vcpu, inst); + break; + case EVENT_ITC_D: + kvm_itc_d(vcpu, inst); + break; + case EVENT_ITC_I: + kvm_itc_i(vcpu, inst); + break; + case EVENT_PTC_L: + kvm_ptc_l(vcpu, inst); + break; + case EVENT_PTC_G: + kvm_ptc_g(vcpu, inst); + break; + case EVENT_PTC_GA: + kvm_ptc_ga(vcpu, inst); + break; + case EVENT_PTC_E: + kvm_ptc_e(vcpu, inst); + break; + case EVENT_MOV_TO_RR: + kvm_mov_to_rr(vcpu, inst); + break; + case EVENT_MOV_FROM_RR: + kvm_mov_from_rr(vcpu, inst); + break; + case EVENT_THASH: + kvm_thash(vcpu, inst); + break; + case EVENT_TTAG: + kvm_ttag(vcpu, inst); + break; + case EVENT_TPA: + status = kvm_tpa(vcpu, inst); + break; + case EVENT_TAK: + kvm_tak(vcpu, inst); + break; + case EVENT_MOV_TO_AR_IMM: + kvm_mov_to_ar_imm(vcpu, inst); + break; + case EVENT_MOV_TO_AR: + kvm_mov_to_ar_reg(vcpu, inst); + break; + case EVENT_MOV_FROM_AR: + kvm_mov_from_ar_reg(vcpu, inst); + break; + case EVENT_MOV_TO_DBR: + kvm_mov_to_dbr(vcpu, inst); + break; + case EVENT_MOV_TO_IBR: + kvm_mov_to_ibr(vcpu, inst); + break; + case EVENT_MOV_TO_PMC: + kvm_mov_to_pmc(vcpu, inst); + break; + case EVENT_MOV_TO_PMD: + kvm_mov_to_pmd(vcpu, inst); + break; + case EVENT_MOV_TO_PKR: + kvm_mov_to_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_DBR: + kvm_mov_from_dbr(vcpu, inst); + break; + case EVENT_MOV_FROM_IBR: + kvm_mov_from_ibr(vcpu, inst); + break; + case EVENT_MOV_FROM_PMC: + kvm_mov_from_pmc(vcpu, inst); + break; + case EVENT_MOV_FROM_PKR: + kvm_mov_from_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_CPUID: + kvm_mov_from_cpuid(vcpu, inst); + break; + case EVENT_VMSW: + status = IA64_FAULT; + break; + default: + break; + }; + /*Assume all status is NO_FAULT ?*/ + if (status == IA64_NO_FAULT && cause != EVENT_RFI) + vcpu_increment_iip(vcpu); + + recover_if_physical_mode(vcpu); +} + +void init_vcpu(struct kvm_vcpu *vcpu) +{ + int i; + + vcpu->arch.mode_flags = GUEST_IN_PHY; + VMX(vcpu, vrr[0]) = 0x38; + VMX(vcpu, vrr[1]) = 0x38; + VMX(vcpu, vrr[2]) = 0x38; + VMX(vcpu, vrr[3]) = 0x38; + VMX(vcpu, vrr[4]) = 0x38; + VMX(vcpu, vrr[5]) = 0x38; + VMX(vcpu, vrr[6]) = 0x38; + VMX(vcpu, vrr[7]) = 0x38; + VCPU(vcpu, vpsr) = IA64_PSR_BN; + VCPU(vcpu, dcr) = 0; + /* pta.size must not be 0. The minimum is 15 (32k) */ + VCPU(vcpu, pta) = 15 << 2; + VCPU(vcpu, itv) = 0x10000; + VCPU(vcpu, itm) = 0; + VMX(vcpu, last_itc) = 0; + + VCPU(vcpu, lid) = VCPU_LID(vcpu); + VCPU(vcpu, ivr) = 0; + VCPU(vcpu, tpr) = 0x10000; + VCPU(vcpu, eoi) = 0; + VCPU(vcpu, irr[0]) = 0; + VCPU(vcpu, irr[1]) = 0; + VCPU(vcpu, irr[2]) = 0; + VCPU(vcpu, irr[3]) = 0; + VCPU(vcpu, pmv) = 0x10000; + VCPU(vcpu, cmcv) = 0x10000; + VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */ + VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */ + update_vhpi(vcpu, NULL_VECTOR); + VLSAPIC_XTP(vcpu) = 0x80; /* disabled */ + + for (i = 0; i < 4; i++) + VLSAPIC_INSVC(vcpu, i) = 0; +} + +void kvm_init_all_rr(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + local_irq_save(psr); + + /* WARNING: not allow co-exist of both virtual mode and physical + * mode in same region + */ + + vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0])); + vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4])); + + if (is_physical_mode(vcpu)) { + if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) + panic_vm(vcpu); + + ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); + ia64_dv_serialize_data(); + ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4); + ia64_dv_serialize_data(); + } else { + ia64_set_rr((VRN0 << VRN_SHIFT), + vcpu->arch.metaphysical_saved_rr0); + ia64_dv_serialize_data(); + ia64_set_rr((VRN4 << VRN_SHIFT), + vcpu->arch.metaphysical_saved_rr4); + ia64_dv_serialize_data(); + } + ia64_set_rr((VRN1 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN1]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN2 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN2]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN3 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN3]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN5 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN5]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN7 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN7]))); + ia64_dv_serialize_data(); + ia64_srlz_d(); + ia64_set_psr(psr); +} + +int vmm_entry(void) +{ + struct kvm_vcpu *v; + v = current_vcpu; + + ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd, + 0, 0, 0, 0, 0, 0); + kvm_init_vtlb(v); + kvm_init_vhpt(v); + init_vcpu(v); + kvm_init_all_rr(v); + vmm_reset_entry(); + + return 0; +} + +void panic_vm(struct kvm_vcpu *v) +{ + struct exit_ctl_data *p = &v->arch.exit_data; + + p->exit_reason = EXIT_REASON_VM_PANIC; + vmm_transition(v); + /*Never to return*/ + while (1); +} diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h new file mode 100644 index 00000000000..b0fcfb62c49 --- /dev/null +++ b/arch/ia64/kvm/vcpu.h @@ -0,0 +1,740 @@ +/* + * vcpu.h: vcpu routines + * Copyright (c) 2005, Intel Corporation. + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * + * Copyright (c) 2007, Intel Corporation. + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + + +#ifndef __KVM_VCPU_H__ +#define __KVM_VCPU_H__ + +#include <asm/types.h> +#include <asm/fpu.h> +#include <asm/processor.h> + +#ifndef __ASSEMBLY__ +#include "vti.h" + +#include <linux/kvm_host.h> +#include <linux/spinlock.h> + +typedef unsigned long IA64_INST; + +typedef union U_IA64_BUNDLE { + unsigned long i64[2]; + struct { unsigned long template:5, slot0:41, slot1a:18, + slot1b:23, slot2:41; }; + /* NOTE: following doesn't work because bitfields can't cross natural + size boundaries + struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */ +} IA64_BUNDLE; + +typedef union U_INST64_A5 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5, + imm9d:9, s:1, major:4; }; +} INST64_A5; + +typedef union U_INST64_B4 { + IA64_INST inst; + struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6, + wh:2, d:1, un1:1, major:4; }; +} INST64_B4; + +typedef union U_INST64_B8 { + IA64_INST inst; + struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; }; +} INST64_B8; + +typedef union U_INST64_B9 { + IA64_INST inst; + struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; }; +} INST64_B9; + +typedef union U_INST64_I19 { + IA64_INST inst; + struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; }; +} INST64_I19; + +typedef union U_INST64_I26 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_I26; + +typedef union U_INST64_I27 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; }; +} INST64_I27; + +typedef union U_INST64_I28 { /* not privileged (mov from AR) */ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_I28; + +typedef union U_INST64_M28 { + IA64_INST inst; + struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M28; + +typedef union U_INST64_M29 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M29; + +typedef union U_INST64_M30 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2, + x3:3, s:1, major:4; }; +} INST64_M30; + +typedef union U_INST64_M31 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M31; + +typedef union U_INST64_M32 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M32; + +typedef union U_INST64_M33 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M33; + +typedef union U_INST64_M35 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; + +} INST64_M35; + +typedef union U_INST64_M36 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; }; +} INST64_M36; + +typedef union U_INST64_M37 { + IA64_INST inst; + struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3, + i:1, major:4; }; +} INST64_M37; + +typedef union U_INST64_M41 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; +} INST64_M41; + +typedef union U_INST64_M42 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M42; + +typedef union U_INST64_M43 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M43; + +typedef union U_INST64_M44 { + IA64_INST inst; + struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; }; +} INST64_M44; + +typedef union U_INST64_M45 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M45; + +typedef union U_INST64_M46 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6, + x3:3, un1:1, major:4; }; +} INST64_M46; + +typedef union U_INST64_M47 { + IA64_INST inst; + struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; }; +} INST64_M47; + +typedef union U_INST64_M1{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M1; + +typedef union U_INST64_M2{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M2; + +typedef union U_INST64_M3{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M3; + +typedef union U_INST64_M4 { + IA64_INST inst; + struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M4; + +typedef union U_INST64_M5 { + IA64_INST inst; + struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M5; + +typedef union U_INST64_M6 { + IA64_INST inst; + struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M6; + +typedef union U_INST64_M9 { + IA64_INST inst; + struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M9; + +typedef union U_INST64_M10 { + IA64_INST inst; + struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M10; + +typedef union U_INST64_M12 { + IA64_INST inst; + struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M12; + +typedef union U_INST64_M15 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M15; + +typedef union U_INST64 { + IA64_INST inst; + struct { unsigned long :37, major:4; } generic; + INST64_A5 A5; /* used in build_hypercall_bundle only */ + INST64_B4 B4; /* used in build_hypercall_bundle only */ + INST64_B8 B8; /* rfi, bsw.[01] */ + INST64_B9 B9; /* break.b */ + INST64_I19 I19; /* used in build_hypercall_bundle only */ + INST64_I26 I26; /* mov register to ar (I unit) */ + INST64_I27 I27; /* mov immediate to ar (I unit) */ + INST64_I28 I28; /* mov from ar (I unit) */ + INST64_M1 M1; /* ld integer */ + INST64_M2 M2; + INST64_M3 M3; + INST64_M4 M4; /* st integer */ + INST64_M5 M5; + INST64_M6 M6; /* ldfd floating pointer */ + INST64_M9 M9; /* stfd floating pointer */ + INST64_M10 M10; /* stfd floating pointer */ + INST64_M12 M12; /* ldfd pair floating pointer */ + INST64_M15 M15; /* lfetch + imm update */ + INST64_M28 M28; /* purge translation cache entry */ + INST64_M29 M29; /* mov register to ar (M unit) */ + INST64_M30 M30; /* mov immediate to ar (M unit) */ + INST64_M31 M31; /* mov from ar (M unit) */ + INST64_M32 M32; /* mov reg to cr */ + INST64_M33 M33; /* mov from cr */ + INST64_M35 M35; /* mov to psr */ + INST64_M36 M36; /* mov from psr */ + INST64_M37 M37; /* break.m */ + INST64_M41 M41; /* translation cache insert */ + INST64_M42 M42; /* mov to indirect reg/translation reg insert*/ + INST64_M43 M43; /* mov from indirect reg */ + INST64_M44 M44; /* set/reset system mask */ + INST64_M45 M45; /* translation purge */ + INST64_M46 M46; /* translation access (tpa,tak) */ + INST64_M47 M47; /* purge translation entry */ +} INST64; + +#define MASK_41 ((unsigned long)0x1ffffffffff) + +/* Virtual address memory attributes encoding */ +#define VA_MATTR_WB 0x0 +#define VA_MATTR_UC 0x4 +#define VA_MATTR_UCE 0x5 +#define VA_MATTR_WC 0x6 +#define VA_MATTR_NATPAGE 0x7 + +#define PMASK(size) (~((size) - 1)) +#define PSIZE(size) (1UL<<(size)) +#define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits)) +#define PAGEALIGN(va, ps) CLEARLSB(va, ps) +#define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53)) +#define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */ + +#define ARCH_PAGE_SHIFT 12 + +#define INVALID_TI_TAG (1UL << 63) + +#define VTLB_PTE_P_BIT 0 +#define VTLB_PTE_IO_BIT 60 +#define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT) +#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT) + +#define vcpu_quick_region_check(_tr_regions,_ifa) \ + (_tr_regions & (1 << ((unsigned long)_ifa >> 61))) + +#define vcpu_quick_region_set(_tr_regions,_ifa) \ + do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0) + +static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir, + u64 va, u64 rid) +{ + trp->page_flags = pte; + trp->itir = itir; + trp->vadr = va; + trp->rid = rid; +} + +extern u64 kvm_lookup_mpa(u64 gpfn); +extern u64 kvm_gpa_to_mpa(u64 gpa); + +/* Return I/O type if trye */ +#define __gpfn_is_io(gpfn) \ + ({ \ + u64 pte, ret = 0; \ + pte = kvm_lookup_mpa(gpfn); \ + if (!(pte & GPFN_INV_MASK)) \ + ret = pte & GPFN_IO_MASK; \ + ret; \ + }) + +#endif + +#define IA64_NO_FAULT 0 +#define IA64_FAULT 1 + +#define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15) + +#define SW_BAD 0 /* Bad mode transitition */ +#define SW_V2P 1 /* Physical emulatino is activated */ +#define SW_P2V 2 /* Exit physical mode emulation */ +#define SW_SELF 3 /* No mode transition */ +#define SW_NOP 4 /* Mode transition, but without action required */ + +#define GUEST_IN_PHY 0x1 +#define GUEST_PHY_EMUL 0x2 + +#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP)) + +#define VRN_SHIFT 61 +#define VRN_MASK 0xe000000000000000 +#define VRN0 0x0UL +#define VRN1 0x1UL +#define VRN2 0x2UL +#define VRN3 0x3UL +#define VRN4 0x4UL +#define VRN5 0x5UL +#define VRN6 0x6UL +#define VRN7 0x7UL + +#define IRQ_NO_MASKED 0 +#define IRQ_MASKED_BY_VTPR 1 +#define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */ + +#define PTA_BASE_SHIFT 15 + +#define IA64_PSR_VM_BIT 46 +#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT) + +/* Interruption Function State */ +#define IA64_IFS_V_BIT 63 +#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT) + +#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX) +#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX) + +#ifndef __ASSEMBLY__ + +#include <asm/gcc_intrin.h> + +#define is_physical_mode(v) \ + ((v->arch.mode_flags) & GUEST_IN_PHY) + +#define is_virtual_mode(v) \ + (!is_physical_mode(v)) + +#define MODE_IND(psr) \ + (((psr).it << 2) + ((psr).dt << 1) + (psr).rt) + +#define _vmm_raw_spin_lock(x) \ + do { \ + __u32 *ia64_spinlock_ptr = (__u32 *) (x); \ + __u64 ia64_spinlock_val; \ + ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ + if (unlikely(ia64_spinlock_val)) { \ + do { \ + while (*ia64_spinlock_ptr) \ + ia64_barrier(); \ + ia64_spinlock_val = \ + ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ + } while (ia64_spinlock_val); \ + } \ + } while (0) + +#define _vmm_raw_spin_unlock(x) \ + do { barrier(); \ + ((spinlock_t *)x)->raw_lock.lock = 0; } \ +while (0) + +void vmm_spin_lock(spinlock_t *lock); +void vmm_spin_unlock(spinlock_t *lock); +enum { + I_TLB = 1, + D_TLB = 2 +}; + +union kvm_va { + struct { + unsigned long off : 60; /* intra-region offset */ + unsigned long reg : 4; /* region number */ + } f; + unsigned long l; + void *p; +}; + +#define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \ + _v.f.reg = 0; _v.l; }) +#define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \ + _v.f.reg = -1; _v.p; }) + +#define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.rid; }) +#define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.ps; }) +#define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.ve; }) + +enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF }; +enum tlb_miss_type { INSTRUCTION, DATA, REGISTER }; + +#define VCPU(_v, _x) ((_v)->arch.vpd->_x) +#define VMX(_v, _x) ((_v)->arch._x) + +#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i]) +#define VLSAPIC_XTP(_v) VMX(_v, xtp) + +static inline unsigned long itir_ps(unsigned long itir) +{ + return ((itir >> 2) & 0x3f); +} + + +/************************************************************************** + VCPU control register access routines + **************************************************************************/ + +static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, itir)); +} + +static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, itir) = val; +} + +static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, ifa)); +} + +static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ifa) = val; +} + +static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, iva)); +} + +static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, pta)); +} + +static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, lid)); +} + +static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, tpr)); +} + +static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu) +{ + return (0UL); /*reads of eoi always return 0 */ +} + +static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[0])); +} + +static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[1])); +} + +static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[2])); +} + +static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[3])); +} + +static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val) +{ + ia64_setreg(_IA64_REG_CR_DCR, val); +} + +static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, isr) = val; +} + +static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, lid) = val; +} + +static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ipsr) = val; +} + +static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iip) = val; +} + +static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ifs) = val; +} + +static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iipa) = val; +} + +static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iha) = val; +} + + +static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg) +{ + return vcpu->arch.vrr[reg>>61]; +} + +/************************************************************************** + VCPU debug breakpoint register access routines + **************************************************************************/ + +static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + __ia64_set_dbr(reg, val); +} + +static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + ia64_set_ibr(reg, val); +} + +static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg) +{ + return ((u64)__ia64_get_dbr(reg)); +} + +static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg) +{ + return ((u64)ia64_get_ibr(reg)); +} + +/************************************************************************** + VCPU performance monitor register access routines + **************************************************************************/ +static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + /* NOTE: Writes to unimplemented PMC registers are discarded */ + ia64_set_pmc(reg, val); +} + +static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + /* NOTE: Writes to unimplemented PMD registers are discarded */ + ia64_set_pmd(reg, val); +} + +static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg) +{ + /* NOTE: Reads from unimplemented PMC registers return zero */ + return ((u64)ia64_get_pmc(reg)); +} + +static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg) +{ + /* NOTE: Reads from unimplemented PMD registers return zero */ + return ((u64)ia64_get_pmd(reg)); +} + +static inline unsigned long vrrtomrr(unsigned long val) +{ + union ia64_rr rr; + rr.val = val; + rr.rid = (rr.rid << 4) | 0xe; + if (rr.ps > PAGE_SHIFT) + rr.ps = PAGE_SHIFT; + rr.ve = 1; + return rr.val; +} + + +static inline int highest_bits(int *dat) +{ + u32 bits, bitnum; + int i; + + /* loop for all 256 bits */ + for (i = 7; i >= 0 ; i--) { + bits = dat[i]; + if (bits) { + bitnum = fls(bits); + return i * 32 + bitnum - 1; + } + } + return NULL_VECTOR; +} + +/* + * The pending irq is higher than the inservice one. + * + */ +static inline int is_higher_irq(int pending, int inservice) +{ + return ((pending > inservice) + || ((pending != NULL_VECTOR) + && (inservice == NULL_VECTOR))); +} + +static inline int is_higher_class(int pending, int mic) +{ + return ((pending >> 4) > mic); +} + +/* + * Return 0-255 for pending irq. + * NULL_VECTOR: when no pending. + */ +static inline int highest_pending_irq(struct kvm_vcpu *vcpu) +{ + if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR)) + return NMI_VECTOR; + if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return highest_bits((int *)&VCPU(vcpu, irr[0])); +} + +static inline int highest_inservice_irq(struct kvm_vcpu *vcpu) +{ + if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR)) + return NMI_VECTOR; + if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return highest_bits((int *)&(VMX(vcpu, insvc[0]))); +} + +extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg, + struct ia64_fpreg *val); +extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg, + struct ia64_fpreg *val); +extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg); +extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat); +extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu); +extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val); +extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr); +extern void vcpu_bsw0(struct kvm_vcpu *vcpu); +extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, + u64 itir, u64 va, int type); +extern struct thash_data *vhpt_lookup(u64 va); +extern u64 guest_vhpt_lookup(u64 iha, u64 *pte); +extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps); +extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps); +extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va); +extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, + u64 itir, u64 ifa, int type); +extern void thash_purge_all(struct kvm_vcpu *v); +extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v, + u64 va, int is_data); +extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, + u64 ps, int is_data); + +extern void vcpu_increment_iip(struct kvm_vcpu *v); +extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu); +extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec); +extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec); +extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr); +extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr); +extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr); +extern void nested_dtlb(struct kvm_vcpu *vcpu); +extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr); +extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref); + +extern void update_vhpi(struct kvm_vcpu *vcpu, int vec); +extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice); + +extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle); +extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma); +extern void vmm_transition(struct kvm_vcpu *vcpu); +extern void vmm_trampoline(union context *from, union context *to); +extern int vmm_entry(void); +extern u64 vcpu_get_itc(struct kvm_vcpu *vcpu); + +extern void vmm_reset_entry(void); +void kvm_init_vtlb(struct kvm_vcpu *v); +void kvm_init_vhpt(struct kvm_vcpu *v); +void thash_init(struct thash_cb *hcb, u64 sz); + +void panic_vm(struct kvm_vcpu *v); + +extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, + u64 arg4, u64 arg5, u64 arg6, u64 arg7); +#endif +#endif /* __VCPU_H__ */ diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c new file mode 100644 index 00000000000..2275bf4e681 --- /dev/null +++ b/arch/ia64/kvm/vmm.c @@ -0,0 +1,66 @@ +/* + * vmm.c: vmm module interface with kvm module + * + * Copyright (c) 2007, Intel Corporation. + * + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + + +#include<linux/module.h> +#include<asm/fpswa.h> + +#include "vcpu.h" + +MODULE_AUTHOR("Intel"); +MODULE_LICENSE("GPL"); + +extern char kvm_ia64_ivt; +extern fpswa_interface_t *vmm_fpswa_interface; + +struct kvm_vmm_info vmm_info = { + .module = THIS_MODULE, + .vmm_entry = vmm_entry, + .tramp_entry = vmm_trampoline, + .vmm_ivt = (unsigned long)&kvm_ia64_ivt, +}; + +static int __init kvm_vmm_init(void) +{ + + vmm_fpswa_interface = fpswa_interface; + + /*Register vmm data to kvm side*/ + return kvm_init(&vmm_info, 1024, THIS_MODULE); +} + +static void __exit kvm_vmm_exit(void) +{ + kvm_exit(); + return ; +} + +void vmm_spin_lock(spinlock_t *lock) +{ + _vmm_raw_spin_lock(lock); +} + +void vmm_spin_unlock(spinlock_t *lock) +{ + _vmm_raw_spin_unlock(lock); +} +module_init(kvm_vmm_init) +module_exit(kvm_vmm_exit) diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S new file mode 100644 index 00000000000..3ee5f481c06 --- /dev/null +++ b/arch/ia64/kvm/vmm_ivt.S @@ -0,0 +1,1424 @@ +/* + * /ia64/kvm_ivt.S + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger <davidm@hpl.hp.com> + * Copyright (C) 2000, 2002-2003 Intel Co + * Asit Mallick <asit.k.mallick@intel.com> + * Suresh Siddha <suresh.b.siddha@intel.com> + * Kenneth Chen <kenneth.w.chen@intel.com> + * Fenghua Yu <fenghua.yu@intel.com> + * + * + * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling + * for SMP + * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB + * handler now uses virtual PT. + * + * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Supporting Intel virtualization architecture + * + */ + +/* + * This file defines the interruption vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for + * critical + * interruptions like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss + * (12,51) + * entry offset ----/ / / / + * / + * entry number ---------/ / / + * / + * size of the entry -------------/ / + * / + * vector name -------------------------------------/ + * / + * interruptions triggering this vector + * ----------------------/ + * + * The table is 32KB in size and must be aligned on 32KB + * boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.6 (Oct 1999) + */ + + +#include <asm/asmmacro.h> +#include <asm/cache.h> +#include <asm/pgtable.h> + +#include "asm-offsets.h" +#include "vcpu.h" +#include "kvm_minstate.h" +#include "vti.h" + +#if 1 +# define PSR_DEFAULT_BITS psr.ac +#else +# define PSR_DEFAULT_BITS 0 +#endif + + +#define KVM_FAULT(n) \ + kvm_fault_##n:; \ + mov r19=n;; \ + br.sptk.many kvm_fault_##n; \ + ;; \ + + +#define KVM_REFLECT(n) \ + mov r31=pr; \ + mov r19=n; /* prepare to save predicates */ \ + mov r29=cr.ipsr; \ + ;; \ + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ +(p7)br.sptk.many kvm_dispatch_reflection; \ + br.sptk.many kvm_panic; \ + + +GLOBAL_ENTRY(kvm_panic) + br.sptk.many kvm_panic + ;; +END(kvm_panic) + + + + + + .section .text.ivt,"ax" + + .align 32768 // align on 32KB boundary + .global kvm_ia64_ivt +kvm_ia64_ivt: +/////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) +ENTRY(kvm_vhpt_miss) + KVM_FAULT(0) +END(kvm_vhpt_miss) + + + .org kvm_ia64_ivt+0x400 +//////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) +ENTRY(kvm_itlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; + (p6) br.sptk kvm_alt_itlb_miss + mov r19 = 1 + br.sptk kvm_itlb_miss_dispatch + KVM_FAULT(1); +END(kvm_itlb_miss) + + .org kvm_ia64_ivt+0x0800 +////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) +ENTRY(kvm_dtlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6)br.sptk kvm_alt_dtlb_miss + br.sptk kvm_dtlb_miss_dispatch +END(kvm_dtlb_miss) + + .org kvm_ia64_ivt+0x0c00 +//////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) +ENTRY(kvm_alt_itlb_miss) + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + mov r24=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r17,r19 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(kvm_alt_itlb_miss) + + .org kvm_ia64_ivt+0x1000 +///////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) +ENTRY(kvm_alt_dtlb_miss) + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r24=cr.ipsr + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r19,r17 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(kvm_alt_dtlb_miss) + + .org kvm_ia64_ivt+0x1400 +////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) +ENTRY(kvm_nested_dtlb_miss) + KVM_FAULT(5) +END(kvm_nested_dtlb_miss) + + .org kvm_ia64_ivt+0x1800 +///////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) +ENTRY(kvm_ikey_miss) + KVM_REFLECT(6) +END(kvm_ikey_miss) + + .org kvm_ia64_ivt+0x1c00 +///////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) +ENTRY(kvm_dkey_miss) + KVM_REFLECT(7) +END(kvm_dkey_miss) + + .org kvm_ia64_ivt+0x2000 +//////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) +ENTRY(kvm_dirty_bit) + KVM_REFLECT(8) +END(kvm_dirty_bit) + + .org kvm_ia64_ivt+0x2400 +//////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) +ENTRY(kvm_iaccess_bit) + KVM_REFLECT(9) +END(kvm_iaccess_bit) + + .org kvm_ia64_ivt+0x2800 +/////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) +ENTRY(kvm_daccess_bit) + KVM_REFLECT(10) +END(kvm_daccess_bit) + + .org kvm_ia64_ivt+0x2c00 +///////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) +ENTRY(kvm_break_fault) + mov r31=pr + mov r19=11 + mov r29=cr.ipsr + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.iim // FIXME: pity to make this slow access twice + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15)ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out1=16,sp + br.call.sptk.many b6=kvm_ia64_handle_break + ;; +END(kvm_break_fault) + + .org kvm_ia64_ivt+0x3000 +///////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) +ENTRY(kvm_interrupt) + mov r31=pr // prepare to save predicates + mov r19=12 + mov r29=cr.ipsr + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT + tbit.z p0,p15=r29,IA64_PSR_I_BIT + ;; +(p7) br.sptk kvm_dispatch_interrupt + ;; + mov r27=ar.rsc /* M */ + mov r20=r1 /* A */ + mov r25=ar.unat /* M */ + mov r26=ar.pfs /* I */ + mov r28=cr.iip /* M */ + cover /* B (or nothing) */ + ;; + mov r1=sp + ;; + invala /* M */ + mov r30=cr.ifs + ;; + addl r1=-VMM_PT_REGS_SIZE,r1 + ;; + adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ + adds r16=PT(CR_IPSR),r1 + ;; + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES + st8 [r16]=r29 /* save cr.ipsr */ + ;; + lfetch.fault.excl.nt1 [r17] + mov r29=b0 + ;; + adds r16=PT(R8),r1 /* initialize first base pointer */ + adds r17=PT(R9),r1 /* initialize second base pointer */ + mov r18=r0 /* make sure r18 isn't NaT */ + ;; +.mem.offset 0,0; st8.spill [r16]=r8,16 +.mem.offset 8,0; st8.spill [r17]=r9,16 + ;; +.mem.offset 0,0; st8.spill [r16]=r10,24 +.mem.offset 8,0; st8.spill [r17]=r11,24 + ;; + st8 [r16]=r28,16 /* save cr.iip */ + st8 [r17]=r30,16 /* save cr.ifs */ + mov r8=ar.fpsr /* M */ + mov r9=ar.csd + mov r10=ar.ssd + movl r11=FPSR_DEFAULT /* L-unit */ + ;; + st8 [r16]=r25,16 /* save ar.unat */ + st8 [r17]=r26,16 /* save ar.pfs */ + shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ + ;; + st8 [r16]=r27,16 /* save ar.rsc */ + adds r17=16,r17 /* skip over ar_rnat field */ + ;; + st8 [r17]=r31,16 /* save predicates */ + adds r16=16,r16 /* skip over ar_bspstore field */ + ;; + st8 [r16]=r29,16 /* save b0 */ + st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ + ;; +.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ +.mem.offset 8,0; st8.spill [r17]=r12,16 + adds r12=-16,r1 + /* switch to kernel memory stack (with 16 bytes of scratch) */ + ;; +.mem.offset 0,0; st8.spill [r16]=r13,16 +.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ + ;; +.mem.offset 0,0; st8.spill [r16]=r15,16 +.mem.offset 8,0; st8.spill [r17]=r14,16 + dep r14=-1,r0,60,4 + ;; +.mem.offset 0,0; st8.spill [r16]=r2,16 +.mem.offset 8,0; st8.spill [r17]=r3,16 + adds r2=VMM_PT_REGS_R16_OFFSET,r1 + adds r14 = VMM_VCPU_GP_OFFSET,r13 + ;; + mov r8=ar.ccv + ld8 r14 = [r14] + ;; + mov r1=r14 /* establish kernel global pointer */ + ;; \ + bsw.1 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + mov out0=r13 + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; +.mem.offset 0,0; st8.spill [r2]=r16,16 +.mem.offset 8,0; st8.spill [r3]=r17,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r18,16 +.mem.offset 8,0; st8.spill [r3]=r19,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r20,16 +.mem.offset 8,0; st8.spill [r3]=r21,16 + mov r18=b6 + ;; +.mem.offset 0,0; st8.spill [r2]=r22,16 +.mem.offset 8,0; st8.spill [r3]=r23,16 + mov r19=b7 + ;; +.mem.offset 0,0; st8.spill [r2]=r24,16 +.mem.offset 8,0; st8.spill [r3]=r25,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r26,16 +.mem.offset 8,0; st8.spill [r3]=r27,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r28,16 +.mem.offset 8,0; st8.spill [r3]=r29,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r30,16 +.mem.offset 8,0; st8.spill [r3]=r31,32 + ;; + mov ar.fpsr=r11 /* M-unit */ + st8 [r2]=r8,8 /* ar.ccv */ + adds r24=PT(B6)-PT(F7),r3 + ;; + stf.spill [r2]=f6,32 + stf.spill [r3]=f7,32 + ;; + stf.spill [r2]=f8,32 + stf.spill [r3]=f9,32 + ;; + stf.spill [r2]=f10 + stf.spill [r3]=f11 + adds r25=PT(B7)-PT(F11),r3 + ;; + st8 [r24]=r18,16 /* b6 */ + st8 [r25]=r19,16 /* b7 */ + ;; + st8 [r24]=r9 /* ar.csd */ + st8 [r25]=r10 /* ar.ssd */ + ;; + srlz.d // make sure we see the effect of cr.ivr + addl r14=@gprel(ia64_leave_nested),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_ia64_handle_irq + ;; +END(kvm_interrupt) + + .global kvm_dispatch_vexirq + .org kvm_ia64_ivt+0x3400 +////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved +ENTRY(kvm_virtual_exirq) + mov r31=pr + mov r19=13 + mov r30 =r0 + ;; +kvm_dispatch_vexirq: + cmp.eq p6,p0 = 1,r30 + ;; +(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; +(p6)ld8 r1 = [r29] + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,1,0 + mov out0=r13 + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + KVM_SAVE_REST + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_vexirq +END(kvm_virtual_exirq) + + .org kvm_ia64_ivt+0x3800 +///////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + KVM_FAULT(14) + // this code segment is from 2.6.16.13 + + + .org kvm_ia64_ivt+0x3c00 +/////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + KVM_FAULT(15) + + + .org kvm_ia64_ivt+0x4000 +/////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + KVM_FAULT(16) + + .org kvm_ia64_ivt+0x4400 +////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + KVM_FAULT(17) + + .org kvm_ia64_ivt+0x4800 +////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + KVM_FAULT(18) + + .org kvm_ia64_ivt+0x4c00 +////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + KVM_FAULT(19) + + .org kvm_ia64_ivt+0x5000 +////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present +ENTRY(kvm_page_not_present) + KVM_REFLECT(20) +END(kvm_page_not_present) + + .org kvm_ia64_ivt+0x5100 +/////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission vector +ENTRY(kvm_key_permission) + KVM_REFLECT(21) +END(kvm_key_permission) + + .org kvm_ia64_ivt+0x5200 +////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) +ENTRY(kvm_iaccess_rights) + KVM_REFLECT(22) +END(kvm_iaccess_rights) + + .org kvm_ia64_ivt+0x5300 +////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) +ENTRY(kvm_daccess_rights) + KVM_REFLECT(23) +END(kvm_daccess_rights) + + .org kvm_ia64_ivt+0x5400 +///////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) +ENTRY(kvm_general_exception) + KVM_REFLECT(24) + KVM_FAULT(24) +END(kvm_general_exception) + + .org kvm_ia64_ivt+0x5500 +////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) +ENTRY(kvm_disabled_fp_reg) + KVM_REFLECT(25) +END(kvm_disabled_fp_reg) + + .org kvm_ia64_ivt+0x5600 +//////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) +ENTRY(kvm_nat_consumption) + KVM_REFLECT(26) +END(kvm_nat_consumption) + + .org kvm_ia64_ivt+0x5700 +///////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) +ENTRY(kvm_speculation_vector) + KVM_REFLECT(27) +END(kvm_speculation_vector) + + .org kvm_ia64_ivt+0x5800 +///////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + KVM_FAULT(28) + + .org kvm_ia64_ivt+0x5900 +/////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) +ENTRY(kvm_debug_vector) + KVM_FAULT(29) +END(kvm_debug_vector) + + .org kvm_ia64_ivt+0x5a00 +/////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) +ENTRY(kvm_unaligned_access) + KVM_REFLECT(30) +END(kvm_unaligned_access) + + .org kvm_ia64_ivt+0x5b00 +////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) +ENTRY(kvm_unsupported_data_reference) + KVM_REFLECT(31) +END(kvm_unsupported_data_reference) + + .org kvm_ia64_ivt+0x5c00 +//////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) +ENTRY(kvm_floating_point_fault) + KVM_REFLECT(32) +END(kvm_floating_point_fault) + + .org kvm_ia64_ivt+0x5d00 +///////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) +ENTRY(kvm_floating_point_trap) + KVM_REFLECT(33) +END(kvm_floating_point_trap) + + .org kvm_ia64_ivt+0x5e00 +////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) +ENTRY(kvm_lower_privilege_trap) + KVM_REFLECT(34) +END(kvm_lower_privilege_trap) + + .org kvm_ia64_ivt+0x5f00 +////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) +ENTRY(kvm_taken_branch_trap) + KVM_REFLECT(35) +END(kvm_taken_branch_trap) + + .org kvm_ia64_ivt+0x6000 +//////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) +ENTRY(kvm_single_step_trap) + KVM_REFLECT(36) +END(kvm_single_step_trap) + .global kvm_virtualization_fault_back + .org kvm_ia64_ivt+0x6100 +///////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault +ENTRY(kvm_virtualization_fault) + mov r31=pr + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + st8 [r16] = r1 + adds r17 = VMM_VCPU_GP_OFFSET, r21 + ;; + ld8 r1 = [r17] + cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 + cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 + cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 + cmp.eq p9,p0=EVENT_RSM,r24 + cmp.eq p10,p0=EVENT_SSM,r24 + cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 + cmp.eq p12,p0=EVENT_THASH,r24 + (p6) br.dptk.many kvm_asm_mov_from_ar + (p7) br.dptk.many kvm_asm_mov_from_rr + (p8) br.dptk.many kvm_asm_mov_to_rr + (p9) br.dptk.many kvm_asm_rsm + (p10) br.dptk.many kvm_asm_ssm + (p11) br.dptk.many kvm_asm_mov_to_psr + (p12) br.dptk.many kvm_asm_thash + ;; +kvm_virtualization_fault_back: + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + ld8 r1 = [r16] + ;; + mov r19=37 + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + cmp.ne p6,p0=EVENT_RFI, r24 + (p6) br.sptk kvm_dispatch_virtualization_fault + ;; + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] + ;; + adds r18=VMM_VPD_VIFS_OFFSET,r18 + ;; + ld8 r18=[r18] + ;; + tbit.z p6,p0=r18,63 + (p6) br.sptk kvm_dispatch_virtualization_fault + ;; + //if vifs.v=1 desert current register frame + alloc r18=ar.pfs,0,0,0,0 + br.sptk kvm_dispatch_virtualization_fault +END(kvm_virtualization_fault) + + .org kvm_ia64_ivt+0x6200 +////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + KVM_FAULT(38) + + .org kvm_ia64_ivt+0x6300 +///////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + KVM_FAULT(39) + + .org kvm_ia64_ivt+0x6400 +///////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + KVM_FAULT(40) + + .org kvm_ia64_ivt+0x6500 +////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + KVM_FAULT(41) + + .org kvm_ia64_ivt+0x6600 +////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + KVM_FAULT(42) + + .org kvm_ia64_ivt+0x6700 +////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + KVM_FAULT(43) + + .org kvm_ia64_ivt+0x6800 +////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + KVM_FAULT(44) + + .org kvm_ia64_ivt+0x6900 +/////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception +//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) +ENTRY(kvm_ia32_exception) + KVM_FAULT(45) +END(kvm_ia32_exception) + + .org kvm_ia64_ivt+0x6a00 +//////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) +ENTRY(kvm_ia32_intercept) + KVM_FAULT(47) +END(kvm_ia32_intercept) + + .org kvm_ia64_ivt+0x6c00 +///////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + KVM_FAULT(48) + + .org kvm_ia64_ivt+0x6d00 +////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + KVM_FAULT(49) + + .org kvm_ia64_ivt+0x6e00 +////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + KVM_FAULT(50) + + .org kvm_ia64_ivt+0x6f00 +///////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + KVM_FAULT(52) + + .org kvm_ia64_ivt+0x7100 +//////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + KVM_FAULT(53) + + .org kvm_ia64_ivt+0x7200 +///////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + KVM_FAULT(54) + + .org kvm_ia64_ivt+0x7300 +//////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + KVM_FAULT(55) + + .org kvm_ia64_ivt+0x7400 +//////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + KVM_FAULT(56) + + .org kvm_ia64_ivt+0x7500 +///////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + KVM_FAULT(57) + + .org kvm_ia64_ivt+0x7600 +///////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + KVM_FAULT(58) + + .org kvm_ia64_ivt+0x7700 +//////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + KVM_FAULT(59) + + .org kvm_ia64_ivt+0x7800 +//////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + KVM_FAULT(60) + + .org kvm_ia64_ivt+0x7900 +///////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + KVM_FAULT(61) + + .org kvm_ia64_ivt+0x7a00 +///////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + KVM_FAULT(62) + + .org kvm_ia64_ivt+0x7b00 +///////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + KVM_FAULT(63) + + .org kvm_ia64_ivt+0x7c00 +//////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + KVM_FAULT(64) + + .org kvm_ia64_ivt+0x7d00 +///////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + KVM_FAULT(65) + + .org kvm_ia64_ivt+0x7e00 +///////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + KVM_FAULT(66) + + .org kvm_ia64_ivt+0x7f00 +//////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + KVM_FAULT(67) + + .org kvm_ia64_ivt+0x8000 +// There is no particular reason for this code to be here, other than that +// there happens to be space here that would go unused otherwise. If this +// fault ever gets "unreserved", simply moved the following code to a more +// suitable spot... + + +ENTRY(kvm_dtlb_miss_dispatch) + mov r19 = 2 + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault +END(kvm_dtlb_miss_dispatch) + +ENTRY(kvm_itlb_miss_dispatch) + + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault +END(kvm_itlb_miss_dispatch) + +ENTRY(kvm_dispatch_reflection) + /* + * Input: + * psr.ic: off + * r19: intr type (offset into ivt, see ia64_int.h) + * r31: contains saved predicates (pr) + */ + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + mov out0=cr.ifa + mov out1=cr.isr + mov out2=cr.iim + mov out3=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out4=16,r12 + br.call.sptk.many b6=reflect_interruption +END(kvm_dispatch_reflection) + +ENTRY(kvm_dispatch_virtualization_fault) + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + mov out0=r13 //vcpu + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out1=16,sp //regs + br.call.sptk.many b6=kvm_emulate +END(kvm_dispatch_virtualization_fault) + + +ENTRY(kvm_dispatch_interrupt) + KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + //mov out0=cr.ivr // pass cr.ivr as first arg + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + mov out0=r13 // pass pointer to pt_regs as second arg + br.call.sptk.many b6=kvm_ia64_handle_irq +END(kvm_dispatch_interrupt) + + + + +GLOBAL_ENTRY(ia64_leave_nested) + rsm psr.i + ;; + adds r21=PT(PR)+16,r12 + ;; + lfetch [r21],PT(CR_IPSR)-PT(PR) + adds r2=PT(B6)+16,r12 + adds r3=PT(R16)+16,r12 + ;; + lfetch [r21] + ld8 r28=[r2],8 // load b6 + adds r29=PT(R24)+16,r12 + + ld8.fill r16=[r3] + adds r3=PT(AR_CSD)-PT(R16),r3 + adds r30=PT(AR_CCV)+16,r12 + ;; + ld8.fill r24=[r29] + ld8 r15=[r30] // load ar.ccv + ;; + ld8 r29=[r2],16 // load b7 + ld8 r30=[r3],16 // load ar.csd + ;; + ld8 r31=[r2],16 // load ar.ssd + ld8.fill r8=[r3],16 + ;; + ld8.fill r9=[r2],16 + ld8.fill r10=[r3],PT(R17)-PT(R10) + ;; + ld8.fill r11=[r2],PT(R18)-PT(R11) + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i + ;; + ld8.fill r22=[r2],24 + ld8.fill r23=[r3],24 + mov b6=r28 + ;; + ld8.fill r25=[r2],16 + ld8.fill r26=[r3],16 + mov b7=r29 + ;; + ld8.fill r27=[r2],16 + ld8.fill r28=[r3],16 + ;; + ld8.fill r29=[r2],16 + ld8.fill r30=[r3],24 + ;; + ld8.fill r31=[r2],PT(F9)-PT(R31) + adds r3=PT(F10)-PT(F6),r3 + ;; + ldf.fill f9=[r2],PT(F6)-PT(F9) + ldf.fill f10=[r3],PT(F8)-PT(F10) + ;; + ldf.fill f6=[r2],PT(F7)-PT(F6) + ;; + ldf.fill f7=[r2],PT(F11)-PT(F7) + ldf.fill f8=[r3],32 + ;; + srlz.i // ensure interruption collection is off + mov ar.ccv=r15 + ;; + bsw.0 // switch back to bank 0 (no stop bit required beforehand...) + ;; + ldf.fill f11=[r2] +// mov r18=r13 +// mov r21=r13 + adds r16=PT(CR_IPSR)+16,r12 + adds r17=PT(CR_IIP)+16,r12 + ;; + ld8 r29=[r16],16 // load cr.ipsr + ld8 r28=[r17],16 // load cr.iip + ;; + ld8 r30=[r16],16 // load cr.ifs + ld8 r25=[r17],16 // load ar.unat + ;; + ld8 r26=[r16],16 // load ar.pfs + ld8 r27=[r17],16 // load ar.rsc + cmp.eq p9,p0=r0,r0 + // set p9 to indicate that we should restore cr.ifs + ;; + ld8 r24=[r16],16 // load ar.rnat (may be garbage) + ld8 r23=[r17],16// load ar.bspstore (may be garbage) + ;; + ld8 r31=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r19=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 + ;; + ld8 r20=[r16],16 // ar.fpsr + ld8.fill r15=[r17],16 + ;; + ld8.fill r14=[r16],16 + ld8.fill r2=[r17] + ;; + ld8.fill r3=[r16] + ;; + mov r16=ar.bsp // get existing backing store pointer + ;; + mov b0=r22 + mov ar.pfs=r26 + mov cr.ifs=r30 + mov cr.ipsr=r29 + mov ar.fpsr=r20 + mov cr.iip=r28 + ;; + mov ar.rsc=r27 + mov ar.unat=r25 + mov pr=r31,-1 + rfi +END(ia64_leave_nested) + + + +GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) + /* + * work.need_resched etc. mustn't get changed + *by this CPU before it returns to + ;; + * user- or fsys-mode, hence we disable interrupts early on: + */ + adds r2 = PT(R4)+16,r12 + adds r3 = PT(R5)+16,r12 + adds r8 = PT(EML_UNAT)+16,r12 + ;; + ld8 r8 = [r8] + ;; + mov ar.unat=r8 + ;; + ld8.fill r4=[r2],16 //load r4 + ld8.fill r5=[r3],16 //load r5 + ;; + ld8.fill r6=[r2] //load r6 + ld8.fill r7=[r3] //load r7 + ;; +END(ia64_leave_hypervisor_prepare) +//fall through +GLOBAL_ENTRY(ia64_leave_hypervisor) + rsm psr.i + ;; + br.call.sptk.many b0=leave_hypervisor_tail + ;; + adds r20=PT(PR)+16,r12 + adds r8=PT(EML_UNAT)+16,r12 + ;; + ld8 r8=[r8] + ;; + mov ar.unat=r8 + ;; + lfetch [r20],PT(CR_IPSR)-PT(PR) + adds r2 = PT(B6)+16,r12 + adds r3 = PT(B7)+16,r12 + ;; + lfetch [r20] + ;; + ld8 r24=[r2],16 /* B6 */ + ld8 r25=[r3],16 /* B7 */ + ;; + ld8 r26=[r2],16 /* ar_csd */ + ld8 r27=[r3],16 /* ar_ssd */ + mov b6 = r24 + ;; + ld8.fill r8=[r2],16 + ld8.fill r9=[r3],16 + mov b7 = r25 + ;; + mov ar.csd = r26 + mov ar.ssd = r27 + ;; + ld8.fill r10=[r2],PT(R15)-PT(R10) + ld8.fill r11=[r3],PT(R14)-PT(R11) + ;; + ld8.fill r15=[r2],PT(R16)-PT(R15) + ld8.fill r14=[r3],PT(R17)-PT(R14) + ;; + ld8.fill r16=[r2],16 + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + ;; + ld8.fill r22=[r2],16 + ld8.fill r23=[r3],16 + ;; + ld8.fill r24=[r2],16 + ld8.fill r25=[r3],16 + ;; + ld8.fill r26=[r2],16 + ld8.fill r27=[r3],16 + ;; + ld8.fill r28=[r2],16 + ld8.fill r29=[r3],16 + ;; + ld8.fill r30=[r2],PT(F6)-PT(R30) + ld8.fill r31=[r3],PT(F7)-PT(R31) + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i // ensure interruption collection is off + ;; + bsw.0 + ;; + adds r16 = PT(CR_IPSR)+16,r12 + adds r17 = PT(CR_IIP)+16,r12 + mov r21=r13 // get current + ;; + ld8 r31=[r16],16 // load cr.ipsr + ld8 r30=[r17],16 // load cr.iip + ;; + ld8 r29=[r16],16 // load cr.ifs + ld8 r28=[r17],16 // load ar.unat + ;; + ld8 r27=[r16],16 // load ar.pfs + ld8 r26=[r17],16 // load ar.rsc + ;; + ld8 r25=[r16],16 // load ar.rnat + ld8 r24=[r17],16 // load ar.bspstore + ;; + ld8 r23=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r20=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 //load r1 + ;; + ld8.fill r12=[r16],16 //load r12 + ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 + ;; + ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr + ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 + ;; + ld8.fill r3=[r16] //load r3 + ld8 r18=[r17] //load ar_ccv + ;; + mov ar.fpsr=r19 + mov ar.ccv=r18 + shr.u r18=r20,16 + ;; +kvm_rbs_switch: + mov r19=96 + +kvm_dont_preserve_current_frame: +/* + * To prevent leaking bits between the hypervisor and guest domain, + * we must clear the stacked registers in the "invalid" partition here. + * 5 registers/cycle on McKinley). + */ +# define pRecurse p6 +# define pReturn p7 +# define Nregs 14 + + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r19 + mov in1=0 + ;; + TEXT_ALIGN(32) +kvm_rse_clear_invalid: + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 + // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 +(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 + // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 +(pReturn) br.ret.dptk.many b0 + +# undef pRecurse +# undef pReturn + +// loadrs has already been shifted + alloc r16=ar.pfs,0,0,0,0 // drop current register frame + ;; + loadrs + ;; + mov ar.bspstore=r24 + ;; + mov ar.unat=r28 + mov ar.rnat=r25 + mov ar.rsc=r26 + ;; + mov cr.ipsr=r31 + mov cr.iip=r30 + mov cr.ifs=r29 + mov ar.pfs=r27 + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] //vpd + adds r17=VMM_VCPU_ISR_OFFSET,r21 + ;; + ld8 r17=[r17] + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] //vpsr + adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21 + ;; + ld8 r20=[r20] + ;; +//vsa_sync_write_start + mov r25=r18 + adds r16= VMM_VCPU_GP_OFFSET,r21 + ;; + ld8 r16= [r16] // Put gp in r24 + movl r24=@gprel(ia64_vmm_entry) // calculate return address + ;; + add r24=r24,r16 + ;; + add r16=PAL_VPS_SYNC_WRITE,r20 + ;; + mov b0=r16 + br.cond.sptk b0 // call the service + ;; +END(ia64_leave_hypervisor) +// fall through +GLOBAL_ENTRY(ia64_vmm_entry) +/* + * must be at bank 0 + * parameter: + * r17:cr.isr + * r18:vpd + * r19:vpsr + * r20:__vsa_base + * r22:b0 + * r23:predicate + */ + mov r24=r22 + mov r25=r18 + tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p1) br.sptk.many ia64_vmm_entry_out + ;; + tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 + (p2) ld8 r26=[r25] + ;; +ia64_vmm_entry_out: + mov pr=r23,-2 + mov b0=r29 + ;; + br.cond.sptk b0 // call pal service +END(ia64_vmm_entry) + + + +/* + * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, + * u64 arg3, u64 arg4, u64 arg5, + * u64 arg6, u64 arg7); + * + * XXX: The currently defined services use only 4 args at the max. The + * rest are not consumed. + */ +GLOBAL_ENTRY(ia64_call_vsa) + .regstk 4,4,0,0 + +rpsave = loc0 +pfssave = loc1 +psrsave = loc2 +entry = loc3 +hostret = r24 + + alloc pfssave=ar.pfs,4,4,0,0 + mov rpsave=rp + adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 + ;; + ld8 entry=[entry] +1: mov hostret=ip + mov r25=in1 // copy arguments + mov r26=in2 + mov r27=in3 + mov psrsave=psr + ;; + tbit.nz p6,p0=psrsave,14 // IA64_PSR_I + tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC + ;; + add hostret=2f-1b,hostret // calculate return address + add entry=entry,in0 + ;; + rsm psr.i | psr.ic + ;; + srlz.i + mov b6=entry + br.cond.sptk b6 // call the service +2: + // Architectural sequence for enabling interrupts if necessary +(p7) ssm psr.ic + ;; +(p7) srlz.i + ;; +//(p6) ssm psr.i + ;; + mov rp=rpsave + mov ar.pfs=pfssave + mov r8=r31 + ;; + srlz.d + br.ret.sptk rp + +END(ia64_call_vsa) + +#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) + +GLOBAL_ENTRY(vmm_reset_entry) + //set up ipsr, iip, vpd.vpsr, dcr + // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 + // For DCR: all bits 0 + adds r14=-VMM_PT_REGS_SIZE, r12 + ;; + movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 + movl r10=0x8000000000000000 + adds r16=PT(CR_IIP), r14 + adds r20=PT(R1), r14 + ;; + rsm psr.ic | psr.i + ;; + srlz.i + ;; + bsw.0 + ;; + mov r21 =r13 + ;; + bsw.1 + ;; + mov ar.rsc = 0 + ;; + flushrs + ;; + mov ar.bspstore = 0 + // clear BSPSTORE + ;; + mov cr.ipsr=r6 + mov cr.ifs=r10 + ld8 r4 = [r16] // Set init iip for first run. + ld8 r1 = [r20] + ;; + mov cr.iip=r4 + ;; + adds r16=VMM_VPD_BASE_OFFSET,r13 + adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13 + ;; + ld8 r18=[r16] + ld8 r20=[r20] + ;; + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] + mov r17=r0 + mov r22=r0 + mov r23=r0 + br.cond.sptk ia64_vmm_entry + br.ret.sptk b0 +END(vmm_reset_entry) diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h new file mode 100644 index 00000000000..f6c5617e16a --- /dev/null +++ b/arch/ia64/kvm/vti.h @@ -0,0 +1,290 @@ +/* + * vti.h: prototype for generial vt related interface + * Copyright (c) 2004, Intel Corporation. + * + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Fred Yang (fred.yang@intel.com) + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + * + * Copyright (c) 2007, Intel Corporation. + * Zhang xiantao <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ +#ifndef _KVM_VT_I_H +#define _KVM_VT_I_H + +#ifndef __ASSEMBLY__ +#include <asm/page.h> + +#include <linux/kvm_host.h> + +/* define itr.i and itr.d in ia64_itr function */ +#define ITR 0x01 +#define DTR 0x02 +#define IaDTR 0x03 + +#define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/ +#define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/ + +#define RR6 (6UL<<61) +#define RR7 (7UL<<61) + + +/* config_options in pal_vp_init_env */ +#define VP_INITIALIZE 1UL +#define VP_FR_PMC 1UL<<1 +#define VP_OPCODE 1UL<<8 +#define VP_CAUSE 1UL<<9 +#define VP_FW_ACC 1UL<<63 + +/* init vp env with initializing vm_buffer */ +#define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\ + VP_OPCODE | VP_CAUSE | VP_FW_ACC) +/* init vp env without initializing vm_buffer */ +#define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC + +#define PAL_VP_CREATE 265 +/* Stacked Virt. Initializes a new VPD for the operation of + * a new virtual processor in the virtual environment. + */ +#define PAL_VP_ENV_INFO 266 +/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/ +#define PAL_VP_EXIT_ENV 267 +/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/ +#define PAL_VP_INIT_ENV 268 +/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/ +#define PAL_VP_REGISTER 269 +/*Stacked Virt. Register a different host IVT for the virtual processor.*/ +#define PAL_VP_RESUME 270 +/* Renamed from PAL_VP_RESUME */ +#define PAL_VP_RESTORE 270 +/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/ +#define PAL_VP_SUSPEND 271 +/* Renamed from PAL_VP_SUSPEND */ +#define PAL_VP_SAVE 271 +/* Stacked Virt. Suspends operation for the specified virtual processor on + * the logical processor. + */ +#define PAL_VP_TERMINATE 272 +/* Stacked Virt. Terminates operation for the specified virtual processor.*/ + +union vac { + unsigned long value; + struct { + int a_int:1; + int a_from_int_cr:1; + int a_to_int_cr:1; + int a_from_psr:1; + int a_from_cpuid:1; + int a_cover:1; + int a_bsw:1; + long reserved:57; + }; +}; + +union vdc { + unsigned long value; + struct { + int d_vmsw:1; + int d_extint:1; + int d_ibr_dbr:1; + int d_pmc:1; + int d_to_pmd:1; + int d_itm:1; + long reserved:58; + }; +}; + +struct vpd { + union vac vac; + union vdc vdc; + unsigned long virt_env_vaddr; + unsigned long reserved1[29]; + unsigned long vhpi; + unsigned long reserved2[95]; + unsigned long vgr[16]; + unsigned long vbgr[16]; + unsigned long vnat; + unsigned long vbnat; + unsigned long vcpuid[5]; + unsigned long reserved3[11]; + unsigned long vpsr; + unsigned long vpr; + unsigned long reserved4[76]; + union { + unsigned long vcr[128]; + struct { + unsigned long dcr; + unsigned long itm; + unsigned long iva; + unsigned long rsv1[5]; + unsigned long pta; + unsigned long rsv2[7]; + unsigned long ipsr; + unsigned long isr; + unsigned long rsv3; + unsigned long iip; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long ifs; + unsigned long iim; + unsigned long iha; + unsigned long rsv4[38]; + unsigned long lid; + unsigned long ivr; + unsigned long tpr; + unsigned long eoi; + unsigned long irr[4]; + unsigned long itv; + unsigned long pmv; + unsigned long cmcv; + unsigned long rsv5[5]; + unsigned long lrr0; + unsigned long lrr1; + unsigned long rsv6[46]; + }; + }; + unsigned long reserved5[128]; + unsigned long reserved6[3456]; + unsigned long vmm_avail[128]; + unsigned long reserved7[4096]; +}; + +#define PAL_PROC_VM_BIT (1UL << 40) +#define PAL_PROC_VMSW_BIT (1UL << 54) + +static inline s64 ia64_pal_vp_env_info(u64 *buffer_size, + u64 *vp_env_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0); + *buffer_size = iprv.v0; + *vp_env_info = iprv.v1; + return iprv.status; +} + +static inline s64 ia64_pal_vp_exit_env(u64 iva) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0); + return iprv.status; +} + +static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr, + u64 vbase_addr, u64 *vsa_base) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr, + vbase_addr); + *vsa_base = iprv.v0; + + return iprv.status; +} + +static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0); + + return iprv.status; +} + +static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0); + + return iprv.status; +} + +#endif + +/*VPD field offset*/ +#define VPD_VAC_START_OFFSET 0 +#define VPD_VDC_START_OFFSET 8 +#define VPD_VHPI_START_OFFSET 256 +#define VPD_VGR_START_OFFSET 1024 +#define VPD_VBGR_START_OFFSET 1152 +#define VPD_VNAT_START_OFFSET 1280 +#define VPD_VBNAT_START_OFFSET 1288 +#define VPD_VCPUID_START_OFFSET 1296 +#define VPD_VPSR_START_OFFSET 1424 +#define VPD_VPR_START_OFFSET 1432 +#define VPD_VRSE_CFLE_START_OFFSET 1440 +#define VPD_VCR_START_OFFSET 2048 +#define VPD_VTPR_START_OFFSET 2576 +#define VPD_VRR_START_OFFSET 3072 +#define VPD_VMM_VAIL_START_OFFSET 31744 + +/*Virtualization faults*/ + +#define EVENT_MOV_TO_AR 1 +#define EVENT_MOV_TO_AR_IMM 2 +#define EVENT_MOV_FROM_AR 3 +#define EVENT_MOV_TO_CR 4 +#define EVENT_MOV_FROM_CR 5 +#define EVENT_MOV_TO_PSR 6 +#define EVENT_MOV_FROM_PSR 7 +#define EVENT_ITC_D 8 +#define EVENT_ITC_I 9 +#define EVENT_MOV_TO_RR 10 +#define EVENT_MOV_TO_DBR 11 +#define EVENT_MOV_TO_IBR 12 +#define EVENT_MOV_TO_PKR 13 +#define EVENT_MOV_TO_PMC 14 +#define EVENT_MOV_TO_PMD 15 +#define EVENT_ITR_D 16 +#define EVENT_ITR_I 17 +#define EVENT_MOV_FROM_RR 18 +#define EVENT_MOV_FROM_DBR 19 +#define EVENT_MOV_FROM_IBR 20 +#define EVENT_MOV_FROM_PKR 21 +#define EVENT_MOV_FROM_PMC 22 +#define EVENT_MOV_FROM_CPUID 23 +#define EVENT_SSM 24 +#define EVENT_RSM 25 +#define EVENT_PTC_L 26 +#define EVENT_PTC_G 27 +#define EVENT_PTC_GA 28 +#define EVENT_PTR_D 29 +#define EVENT_PTR_I 30 +#define EVENT_THASH 31 +#define EVENT_TTAG 32 +#define EVENT_TPA 33 +#define EVENT_TAK 34 +#define EVENT_PTC_E 35 +#define EVENT_COVER 36 +#define EVENT_RFI 37 +#define EVENT_BSW_0 38 +#define EVENT_BSW_1 39 +#define EVENT_VMSW 40 + +/**PAL virtual services offsets */ +#define PAL_VPS_RESUME_NORMAL 0x0000 +#define PAL_VPS_RESUME_HANDLER 0x0400 +#define PAL_VPS_SYNC_READ 0x0800 +#define PAL_VPS_SYNC_WRITE 0x0c00 +#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000 +#define PAL_VPS_THASH 0x1400 +#define PAL_VPS_TTAG 0x1800 +#define PAL_VPS_RESTORE 0x1c00 +#define PAL_VPS_SAVE 0x2000 + +#endif/* _VT_I_H*/ diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c new file mode 100644 index 00000000000..def4576d22b --- /dev/null +++ b/arch/ia64/kvm/vtlb.c @@ -0,0 +1,636 @@ +/* + * vtlb.c: guest virtual tlb handling module. + * Copyright (c) 2004, Intel Corporation. + * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com> + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> + * + * Copyright (c) 2007, Intel Corporation. + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> + * Xiantao Zhang <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include "vcpu.h" + +#include <linux/rwsem.h> + +#include <asm/tlb.h> + +/* + * Check to see if the address rid:va is translated by the TLB + */ + +static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va) +{ + return ((trp->p) && (trp->rid == rid) + && ((va-trp->vadr) < PSIZE(trp->ps))); +} + +/* + * Only for GUEST TR format. + */ +static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva) +{ + u64 sa1, ea1; + + if (!trp->p || trp->rid != rid) + return 0; + + sa1 = trp->vadr; + ea1 = sa1 + PSIZE(trp->ps) - 1; + eva -= 1; + if ((sva > ea1) || (sa1 > eva)) + return 0; + else + return 1; + +} + +void machine_tlb_purge(u64 va, u64 ps) +{ + ia64_ptcl(va, ps << 2); +} + +void local_flush_tlb_all(void) +{ + int i, j; + unsigned long flags, count0, count1; + unsigned long stride0, stride1, addr; + + addr = current_vcpu->arch.ptce_base; + count0 = current_vcpu->arch.ptce_count[0]; + count1 = current_vcpu->arch.ptce_count[1]; + stride0 = current_vcpu->arch.ptce_stride[0]; + stride1 = current_vcpu->arch.ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref) +{ + union ia64_rr vrr; + union ia64_pta vpta; + struct ia64_psr vpsr; + + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + vrr.val = vcpu_get_rr(vcpu, vadr); + vpta.val = vcpu_get_pta(vcpu); + + if (vrr.ve & vpta.ve) { + switch (ref) { + case DATA_REF: + case NA_REF: + return vpsr.dt; + case INST_REF: + return vpsr.dt && vpsr.it && vpsr.ic; + case RSE_REF: + return vpsr.dt && vpsr.rt; + + } + } + return 0; +} + +struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag) +{ + u64 index, pfn, rid, pfn_bits; + + pfn_bits = vpta.size - 5 - 8; + pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr); + rid = _REGION_ID(vrr); + index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1)); + *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16); + + return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) + + (index << 5)); +} + +struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type) +{ + + struct thash_data *trp; + int i; + u64 rid; + + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK;; + if (type == D_TLB) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; + i < NDTRS; i++, trp++) { + if (__is_tr_translated(trp, rid, va)) + return trp; + } + } + } else { + if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; + i < NITRS; i++, trp++) { + if (__is_tr_translated(trp, rid, va)) + return trp; + } + } + } + + return NULL; +} + +static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte) +{ + union ia64_rr rr; + struct thash_data *head; + unsigned long ps, gpaddr; + + ps = itir_ps(itir); + + gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | + (ifa & ((1UL << ps) - 1)); + + rr.val = ia64_get_rr(ifa); + head = (struct thash_data *)ia64_thash(ifa); + head->etag = INVALID_TI_TAG; + ia64_mf(); + head->page_flags = pte & ~PAGE_FLAGS_RV_MASK; + head->itir = rr.ps << 2; + head->etag = ia64_ttag(ifa); + head->gpaddr = gpaddr; +} + +void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) +{ + u64 i, dirty_pages = 1; + u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; + spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); + void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) + + KVM_MEM_DIRTY_LOG_OFS; + dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; + + vmm_spin_lock(lock); + for (i = 0; i < dirty_pages; i++) { + /* avoid RMW */ + if (!test_bit(base_gfn + i, dirty_bitmap)) + set_bit(base_gfn + i , dirty_bitmap); + } + vmm_spin_unlock(lock); +} + +void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type) +{ + u64 phy_pte, psr; + union ia64_rr mrr; + + mrr.val = ia64_get_rr(va); + phy_pte = translate_phy_pte(&pte, itir, va); + + if (itir_ps(itir) >= mrr.ps) { + vhpt_insert(phy_pte, itir, va, pte); + } else { + phy_pte &= ~PAGE_FLAGS_RV_MASK; + psr = ia64_clear_ic(); + ia64_itc(type, va, phy_pte, itir_ps(itir)); + ia64_set_psr(psr); + } + + if (!(pte&VTLB_PTE_IO)) + mark_pages_dirty(v, pte, itir_ps(itir)); +} + +/* + * vhpt lookup + */ +struct thash_data *vhpt_lookup(u64 va) +{ + struct thash_data *head; + u64 tag; + + head = (struct thash_data *)ia64_thash(va); + tag = ia64_ttag(va); + if (head->etag == tag) + return head; + return NULL; +} + +u64 guest_vhpt_lookup(u64 iha, u64 *pte) +{ + u64 ret; + struct thash_data *data; + + data = __vtr_lookup(current_vcpu, iha, D_TLB); + if (data != NULL) + thash_vhpt_insert(current_vcpu, data->page_flags, + data->itir, iha, D_TLB); + + asm volatile ("rsm psr.ic|psr.i;;" + "srlz.d;;" + "ld8.s r9=[%1];;" + "tnat.nz p6,p7=r9;;" + "(p6) mov %0=1;" + "(p6) mov r9=r0;" + "(p7) extr.u r9=r9,0,53;;" + "(p7) mov %0=r0;" + "(p7) st8 [%2]=r9;;" + "ssm psr.ic;;" + "srlz.d;;" + /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/ + : "=r"(ret) : "r"(iha), "r"(pte):"memory"); + + return ret; +} + +/* + * purge software guest tlb + */ + +static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps) +{ + struct thash_data *cur; + u64 start, curadr, size, psbits, tag, rr_ps, num; + union ia64_rr vrr; + struct thash_cb *hcb = &v->arch.vtlb; + + vrr.val = vcpu_get_rr(v, va); + psbits = VMX(v, psbits[(va >> 61)]); + start = va & ~((1UL << ps) - 1); + while (psbits) { + curadr = start; + rr_ps = __ffs(psbits); + psbits &= ~(1UL << rr_ps); + num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps)); + size = PSIZE(rr_ps); + vrr.ps = rr_ps; + while (num) { + cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag); + if (cur->etag == tag && cur->ps == rr_ps) + cur->etag = INVALID_TI_TAG; + curadr += size; + num--; + } + } +} + + +/* + * purge VHPT and machine TLB + */ +static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps) +{ + struct thash_data *cur; + u64 start, size, tag, num; + union ia64_rr rr; + + start = va & ~((1UL << ps) - 1); + rr.val = ia64_get_rr(va); + size = PSIZE(rr.ps); + num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps)); + while (num) { + cur = (struct thash_data *)ia64_thash(start); + tag = ia64_ttag(start); + if (cur->etag == tag) + cur->etag = INVALID_TI_TAG; + start += size; + num--; + } + machine_tlb_purge(va, ps); +} + +/* + * Insert an entry into hash TLB or VHPT. + * NOTES: + * 1: When inserting VHPT to thash, "va" is a must covered + * address by the inserted machine VHPT entry. + * 2: The format of entry is always in TLB. + * 3: The caller need to make sure the new entry will not overlap + * with any existed entry. + */ +void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va) +{ + struct thash_data *head; + union ia64_rr vrr; + u64 tag; + struct thash_cb *hcb = &v->arch.vtlb; + + vrr.val = vcpu_get_rr(v, va); + vrr.ps = itir_ps(itir); + VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps); + head = vsa_thash(hcb->pta, va, vrr.val, &tag); + head->page_flags = pte; + head->itir = itir; + head->etag = tag; +} + +int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type) +{ + struct thash_data *trp; + int i; + u64 end, rid; + + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK; + end = va + PSIZE(ps); + if (type == D_TLB) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; + i < NDTRS; i++, trp++) { + if (__is_tr_overlap(trp, rid, va, end)) + return i; + } + } + } else { + if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; + i < NITRS; i++, trp++) { + if (__is_tr_overlap(trp, rid, va, end)) + return i; + } + } + } + return -1; +} + +/* + * Purge entries in VTLB and VHPT + */ +void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps) +{ + if (vcpu_quick_region_check(v->arch.tc_regions, va)) + vtlb_purge(v, va, ps); + vhpt_purge(v, va, ps); +} + +void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps) +{ + u64 old_va = va; + va = REGION_OFFSET(va); + if (vcpu_quick_region_check(v->arch.tc_regions, old_va)) + vtlb_purge(v, va, ps); + vhpt_purge(v, va, ps); +} + +u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) +{ + u64 ps, ps_mask, paddr, maddr; + union pte_flags phy_pte; + + ps = itir_ps(itir); + ps_mask = ~((1UL << ps) - 1); + phy_pte.val = *pte; + paddr = *pte; + paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); + maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT); + if (maddr & GPFN_IO_MASK) { + *pte |= VTLB_PTE_IO; + return -1; + } + maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | + (paddr & ~PAGE_MASK); + phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT; + return phy_pte.val; +} + +/* + * Purge overlap TCs and then insert the new entry to emulate itc ops. + * Notes: Only TC entry can purge and insert. + * 1 indicates this is MMIO + */ +int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, + u64 ifa, int type) +{ + u64 ps; + u64 phy_pte; + union ia64_rr vrr, mrr; + int ret = 0; + + ps = itir_ps(itir); + vrr.val = vcpu_get_rr(v, ifa); + mrr.val = ia64_get_rr(ifa); + + phy_pte = translate_phy_pte(&pte, itir, ifa); + + /* Ensure WB attribute if pte is related to a normal mem page, + * which is required by vga acceleration since qemu maps shared + * vram buffer with WB. + */ + if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) { + pte &= ~_PAGE_MA_MASK; + phy_pte &= ~_PAGE_MA_MASK; + } + + if (pte & VTLB_PTE_IO) + ret = 1; + + vtlb_purge(v, ifa, ps); + vhpt_purge(v, ifa, ps); + + if (ps == mrr.ps) { + if (!(pte&VTLB_PTE_IO)) { + vhpt_insert(phy_pte, itir, ifa, pte); + } else { + vtlb_insert(v, pte, itir, ifa); + vcpu_quick_region_set(VMX(v, tc_regions), ifa); + } + } else if (ps > mrr.ps) { + vtlb_insert(v, pte, itir, ifa); + vcpu_quick_region_set(VMX(v, tc_regions), ifa); + if (!(pte&VTLB_PTE_IO)) + vhpt_insert(phy_pte, itir, ifa, pte); + } else { + u64 psr; + phy_pte &= ~PAGE_FLAGS_RV_MASK; + psr = ia64_clear_ic(); + ia64_itc(type, ifa, phy_pte, ps); + ia64_set_psr(psr); + } + if (!(pte&VTLB_PTE_IO)) + mark_pages_dirty(v, pte, ps); + + return ret; +} + +/* + * Purge all TCs or VHPT entries including those in Hash table. + * + */ + +void thash_purge_all(struct kvm_vcpu *v) +{ + int i; + struct thash_data *head; + struct thash_cb *vtlb, *vhpt; + vtlb = &v->arch.vtlb; + vhpt = &v->arch.vhpt; + + for (i = 0; i < 8; i++) + VMX(v, psbits[i]) = 0; + + head = vtlb->hash; + for (i = 0; i < vtlb->num; i++) { + head->page_flags = 0; + head->etag = INVALID_TI_TAG; + head->itir = 0; + head->next = 0; + head++; + }; + + head = vhpt->hash; + for (i = 0; i < vhpt->num; i++) { + head->page_flags = 0; + head->etag = INVALID_TI_TAG; + head->itir = 0; + head->next = 0; + head++; + }; + + local_flush_tlb_all(); +} + + +/* + * Lookup the hash table and its collision chain to find an entry + * covering this address rid:va or the entry. + * + * INPUT: + * in: TLB format for both VHPT & TLB. + */ + +struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) +{ + struct thash_data *cch; + u64 psbits, ps, tag; + union ia64_rr vrr; + + struct thash_cb *hcb = &v->arch.vtlb; + + cch = __vtr_lookup(v, va, is_data);; + if (cch) + return cch; + + if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0) + return NULL; + + psbits = VMX(v, psbits[(va >> 61)]); + vrr.val = vcpu_get_rr(v, va); + while (psbits) { + ps = __ffs(psbits); + psbits &= ~(1UL << ps); + vrr.ps = ps; + cch = vsa_thash(hcb->pta, va, vrr.val, &tag); + if (cch->etag == tag && cch->ps == ps) + return cch; + } + + return NULL; +} + + +/* + * Initialize internal control data before service. + */ +void thash_init(struct thash_cb *hcb, u64 sz) +{ + int i; + struct thash_data *head; + + hcb->pta.val = (unsigned long)hcb->hash; + hcb->pta.vf = 1; + hcb->pta.ve = 1; + hcb->pta.size = sz; + head = hcb->hash; + for (i = 0; i < hcb->num; i++) { + head->page_flags = 0; + head->itir = 0; + head->etag = INVALID_TI_TAG; + head->next = 0; + head++; + } +} + +u64 kvm_lookup_mpa(u64 gpfn) +{ + u64 *base = (u64 *) KVM_P2M_BASE; + return *(base + gpfn); +} + +u64 kvm_gpa_to_mpa(u64 gpa) +{ + u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT); + return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK); +} + + +/* + * Fetch guest bundle code. + * INPUT: + * gip: guest ip + * pbundle: used to return fetched bundle. + */ +int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle) +{ + u64 gpip = 0; /* guest physical IP*/ + u64 *vpa; + struct thash_data *tlb; + u64 maddr; + + if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) { + /* I-side physical mode */ + gpip = gip; + } else { + tlb = vtlb_lookup(vcpu, gip, I_TLB); + if (tlb) + gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) | + (gip & (PSIZE(tlb->ps) - 1)); + } + if (gpip) { + maddr = kvm_gpa_to_mpa(gpip); + } else { + tlb = vhpt_lookup(gip); + if (tlb == NULL) { + ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2); + return IA64_FAULT; + } + maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) + | (gip & (PSIZE(tlb->ps) - 1)); + } + vpa = (u64 *)__kvm_va(maddr); + + pbundle->i64[0] = *vpa++; + pbundle->i64[1] = *vpa; + + return IA64_NO_FAULT; +} + + +void kvm_init_vhpt(struct kvm_vcpu *v) +{ + v->arch.vhpt.num = VHPT_NUM_ENTRIES; + thash_init(&v->arch.vhpt, VHPT_SHIFT); + ia64_set_pta(v->arch.vhpt.pta.val); + /*Enable VHPT here?*/ +} + +void kvm_init_vtlb(struct kvm_vcpu *v) +{ + v->arch.vtlb.num = VTLB_NUM_ENTRIES; + thash_init(&v->arch.vtlb, VTLB_SHIFT); +} diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 5c1de53c8c1..fc6c6636ffd 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -682,15 +682,6 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -void online_page(struct page *page) -{ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalram_pages++; - num_physpages++; -} - int arch_add_memory(int nid, u64 start, u64 size) { pg_data_t *pgdat; diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c index 2b412454cb4..ded7dd2f67b 100644 --- a/arch/m68k/kernel/ints.c +++ b/arch/m68k/kernel/ints.c @@ -186,7 +186,7 @@ int setup_irq(unsigned int irq, struct irq_node *node) if (irq >= NR_IRQS || !(contr = irq_controller[irq])) { printk("%s: Incorrect IRQ %d from %s\n", - __FUNCTION__, irq, node->devname); + __func__, irq, node->devname); return -ENXIO; } @@ -249,7 +249,7 @@ void free_irq(unsigned int irq, void *dev_id) unsigned long flags; if (irq >= NR_IRQS || !(contr = irq_controller[irq])) { - printk("%s: Incorrect IRQ %d\n", __FUNCTION__, irq); + printk("%s: Incorrect IRQ %d\n", __func__, irq); return; } @@ -267,7 +267,7 @@ void free_irq(unsigned int irq, void *dev_id) node->handler = NULL; } else printk("%s: Removing probably wrong IRQ %d\n", - __FUNCTION__, irq); + __func__, irq); if (!irq_list[irq]) { if (contr->shutdown) @@ -288,7 +288,7 @@ void enable_irq(unsigned int irq) if (irq >= NR_IRQS || !(contr = irq_controller[irq])) { printk("%s: Incorrect IRQ %d\n", - __FUNCTION__, irq); + __func__, irq); return; } @@ -312,7 +312,7 @@ void disable_irq(unsigned int irq) if (irq >= NR_IRQS || !(contr = irq_controller[irq])) { printk("%s: Incorrect IRQ %d\n", - __FUNCTION__, irq); + __func__, irq); return; } diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c index 50603d3dce8..3c943d2ec57 100644 --- a/arch/m68k/mac/oss.c +++ b/arch/m68k/mac/oss.c @@ -190,7 +190,7 @@ void oss_irq_enable(int irq) { break; #ifdef DEBUG_IRQUSE default: - printk("%s unknown irq %d\n",__FUNCTION__, irq); + printk("%s unknown irq %d\n", __func__, irq); break; #endif } @@ -230,7 +230,7 @@ void oss_irq_disable(int irq) { break; #ifdef DEBUG_IRQUSE default: - printk("%s unknown irq %d\n", __FUNCTION__, irq); + printk("%s unknown irq %d\n", __func__, irq); break; #endif } diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index f42caa79e4e..a2bb01f5964 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -79,7 +79,6 @@ void show_mem(void) printk("\nMem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { struct page *page = pgdat->node_mem_map + i; diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c index 46161cef08b..9f0e3d59bf9 100644 --- a/arch/m68k/q40/q40ints.c +++ b/arch/m68k/q40/q40ints.c @@ -47,7 +47,7 @@ static int q40_irq_startup(unsigned int irq) switch (irq) { case 1: case 2: case 8: case 9: case 11: case 12: case 13: - printk("%s: ISA IRQ %d not implemented by HW\n", __FUNCTION__, irq); + printk("%s: ISA IRQ %d not implemented by HW\n", __func__, irq); return -ENXIO; } return 0; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8724ed3298d..e5a7c5d9636 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -81,7 +81,9 @@ config MIPS_COBALT config MACH_DECSTATION bool "DECstations" select BOOT_ELF32 + select CEVT_DS1287 select CEVT_R4K + select CSRC_IOASIC select CSRC_R4K select CPU_DADDI_WORKAROUNDS if 64BIT select CPU_R4000_WORKAROUNDS if 64BIT @@ -221,6 +223,7 @@ config MIPS_MALTA select DMA_NONCOHERENT select GENERIC_ISA_DMA select IRQ_CPU + select IRQ_GIC select HW_HAS_PCI select I8253 select I8259 @@ -309,12 +312,12 @@ config MACH_VR41XX select GENERIC_HARDIRQS_NO__DO_IRQ config PNX8550_JBS - bool "Philips PNX8550 based JBS board" + bool "NXP PNX8550 based JBS board" select PNX8550 select SYS_SUPPORTS_LITTLE_ENDIAN config PNX8550_STB810 - bool "Philips PNX8550 based STB810 board" + bool "NXP PNX8550 based STB810 board" select PNX8550 select SYS_SUPPORTS_LITTLE_ENDIAN @@ -612,6 +615,7 @@ config TOSHIBA_JMR3927 select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_BIG_ENDIAN select GENERIC_HARDIRQS_NO__DO_IRQ + select GPIO_TXX9 config TOSHIBA_RBTX4927 bool "Toshiba RBTX49[23]7 board" @@ -653,7 +657,7 @@ config TOSHIBA_RBTX4938 select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_KGDB select GENERIC_HARDIRQS_NO__DO_IRQ - select GENERIC_GPIO + select GPIO_TXX9 help This Toshiba board is based on the TX4938 processor. Say Y here to support this machine type @@ -767,6 +771,9 @@ config BOOT_RAW config CEVT_BCM1480 bool +config CEVT_DS1287 + bool + config CEVT_GT641XX bool @@ -782,12 +789,20 @@ config CEVT_TXX9 config CSRC_BCM1480 bool +config CSRC_IOASIC + bool + config CSRC_R4K bool config CSRC_SB1250 bool +config GPIO_TXX9 + select GENERIC_GPIO + select HAVE_GPIO_LIB + bool + config CFE bool @@ -840,6 +855,9 @@ config MIPS_NILE4 config MIPS_DISABLE_OBSOLETE_IDE bool +config SYNC_R4K + bool + config NO_IOPORT def_bool n @@ -909,6 +927,9 @@ config IRQ_TXX9 config IRQ_GT641XX bool +config IRQ_GIC + bool + config MIPS_BOARDS_GEN bool @@ -1811,6 +1832,17 @@ config NR_CPUS performance should round up your number of processors to the next power of two. +config MIPS_CMP + bool "MIPS CMP framework support" + depends on SMP + select SYNC_R4K + select SYS_SUPPORTS_SCHED_SMT + select WEAK_ORDERING + default n + help + This is a placeholder option for the GCMP work. It will need to + be handled differently... + source "kernel/time/Kconfig" # diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index fd7124c1b75..f18cf92650e 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -73,14 +73,4 @@ config RUNTIME_DEBUG include/asm-mips/debug.h for debuging macros. If unsure, say N. -config MIPS_UNCACHED - bool "Run uncached" - depends on DEBUG_KERNEL && !SMP && !SGI_IP27 - help - If you say Y here there kernel will disable all CPU caches. This will - reduce the system's performance dramatically but can help finding - otherwise hard to track bugs. It can also useful if you're doing - hardware debugging with a logic analyzer and need to see all traffic - on the bus. - endmenu diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 1c62381f5c2..69648d01acc 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -410,21 +410,21 @@ load-$(CONFIG_CASIO_E55) += 0xffffffff80004000 load-$(CONFIG_TANBAC_TB022X) += 0xffffffff80000000 # -# Common Philips PNX8550 +# Common NXP PNX8550 # -core-$(CONFIG_SOC_PNX8550) += arch/mips/philips/pnx8550/common/ +core-$(CONFIG_SOC_PNX8550) += arch/mips/nxp/pnx8550/common/ cflags-$(CONFIG_SOC_PNX8550) += -Iinclude/asm-mips/mach-pnx8550 # -# Philips PNX8550 JBS board +# NXP PNX8550 JBS board # -libs-$(CONFIG_PNX8550_JBS) += arch/mips/philips/pnx8550/jbs/ +libs-$(CONFIG_PNX8550_JBS) += arch/mips/nxp/pnx8550/jbs/ #cflags-$(CONFIG_PNX8550_JBS) += -Iinclude/asm-mips/mach-pnx8550 load-$(CONFIG_PNX8550_JBS) += 0xffffffff80060000 -# Philips PNX8550 STB810 board +# NXP PNX8550 STB810 board # -libs-$(CONFIG_PNX8550_STB810) += arch/mips/philips/pnx8550/stb810/ +libs-$(CONFIG_PNX8550_STB810) += arch/mips/nxp/pnx8550/stb810/ load-$(CONFIG_PNX8550_STB810) += 0xffffffff80060000 # NEC EMMA2RH boards diff --git a/arch/mips/au1000/common/cputable.c b/arch/mips/au1000/common/cputable.c index 5c0d35d6e22..8c93a05d738 100644 --- a/arch/mips/au1000/common/cputable.c +++ b/arch/mips/au1000/common/cputable.c @@ -11,10 +11,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/string.h> -#include <linux/sched.h> -#include <linux/threads.h> -#include <linux/init.h> + #include <asm/mach-au1x00/au1000.h> struct cpu_spec* cur_cpu_spec[NR_CPUS]; diff --git a/arch/mips/au1000/common/dbdma.c b/arch/mips/au1000/common/dbdma.c index 57f17b41098..53377dfc064 100644 --- a/arch/mips/au1000/common/dbdma.c +++ b/arch/mips/au1000/common/dbdma.c @@ -31,18 +31,12 @@ */ #include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/delay.h> #include <linux/interrupt.h> #include <linux/module.h> #include <asm/mach-au1x00/au1000.h> #include <asm/mach-au1x00/au1xxx_dbdma.h> -#include <asm/system.h> - #if defined(CONFIG_SOC_AU1550) || defined(CONFIG_SOC_AU1200) diff --git a/arch/mips/au1000/common/dbg_io.c b/arch/mips/au1000/common/dbg_io.c index 79e0b0a51ac..eae1bb2ca26 100644 --- a/arch/mips/au1000/common/dbg_io.c +++ b/arch/mips/au1000/common/dbg_io.c @@ -1,5 +1,4 @@ -#include <asm/io.h> #include <asm/mach-au1x00/au1000.h> #ifdef CONFIG_KGDB @@ -55,8 +54,7 @@ typedef unsigned int uint32; #define UART16550_READ(y) (au_readl(DEBUG_BASE + y) & 0xff) #define UART16550_WRITE(y, z) (au_writel(z&0xff, DEBUG_BASE + y)) -extern unsigned long get_au1x00_uart_baud_base(void); -extern unsigned long cal_r4koff(void); +extern unsigned long calc_clock(void); void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop) { @@ -64,7 +62,7 @@ void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop) if (UART16550_READ(UART_MOD_CNTRL) != 0x3) { UART16550_WRITE(UART_MOD_CNTRL, 3); } - cal_r4koff(); + calc_clock(); /* disable interrupts */ UART16550_WRITE(UART_IER, 0); diff --git a/arch/mips/au1000/common/dma.c b/arch/mips/au1000/common/dma.c index c78260d4e83..95f69ea146e 100644 --- a/arch/mips/au1000/common/dma.c +++ b/arch/mips/au1000/common/dma.c @@ -33,12 +33,9 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/errno.h> -#include <linux/sched.h> #include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/delay.h> #include <linux/interrupt.h> -#include <asm/system.h> + #include <asm/mach-au1x00/au1000.h> #include <asm/mach-au1x00/au1000_dma.h> diff --git a/arch/mips/au1000/common/gpio.c b/arch/mips/au1000/common/gpio.c index 0b658f1db4c..52545258997 100644 --- a/arch/mips/au1000/common/gpio.c +++ b/arch/mips/au1000/common/gpio.c @@ -27,13 +27,8 @@ * others have a second one : GPIO2 */ -#include <linux/init.h> -#include <linux/io.h> -#include <linux/types.h> #include <linux/module.h> -#include <asm/addrspace.h> - #include <asm/mach-au1x00/au1000.h> #include <asm/gpio.h> diff --git a/arch/mips/au1000/common/irq.c b/arch/mips/au1000/common/irq.c index 3c7714f057a..f0626992fd7 100644 --- a/arch/mips/au1000/common/irq.c +++ b/arch/mips/au1000/common/irq.c @@ -1,7 +1,6 @@ /* - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com + * Copyright 2001, 2007-2008 MontaVista Software Inc. + * Author: MontaVista Software, Inc. <source@mvista.com> * * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) * @@ -27,7 +26,6 @@ */ #include <linux/bitops.h> #include <linux/init.h> -#include <linux/io.h> #include <linux/interrupt.h> #include <linux/irq.h> @@ -591,7 +589,7 @@ void __init arch_init_irq(void) imp++; } - set_c0_status(ALLINTS); + set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 | IE_IRQ3 | IE_IRQ4); /* Board specific IRQ initialization. */ diff --git a/arch/mips/au1000/common/pci.c b/arch/mips/au1000/common/pci.c index ce771487567..7e966b31e3e 100644 --- a/arch/mips/au1000/common/pci.c +++ b/arch/mips/au1000/common/pci.c @@ -30,7 +30,7 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/types.h> + #include <linux/pci.h> #include <linux/kernel.h> #include <linux/init.h> diff --git a/arch/mips/au1000/common/platform.c b/arch/mips/au1000/common/platform.c index 39d68126529..31d2a227087 100644 --- a/arch/mips/au1000/common/platform.c +++ b/arch/mips/au1000/common/platform.c @@ -3,18 +3,65 @@ * * Copyright 2004, Matt Porter <mporter@kernel.crashing.org> * + * (C) Copyright Embedded Alley Solutions, Inc 2005 + * Author: Pantelis Antoniou <pantelis@embeddedalley.com> + * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any * warranty of any kind, whether express or implied. */ -#include <linux/device.h> + #include <linux/platform_device.h> -#include <linux/kernel.h> +#include <linux/serial_8250.h> #include <linux/init.h> -#include <linux/resource.h> #include <asm/mach-au1x00/au1xxx.h> +#define PORT(_base, _irq) \ + { \ + .iobase = _base, \ + .membase = (void __iomem *)_base,\ + .mapbase = CPHYSADDR(_base), \ + .irq = _irq, \ + .regshift = 2, \ + .iotype = UPIO_AU, \ + .flags = UPF_SKIP_TEST \ + } + +static struct plat_serial8250_port au1x00_uart_data[] = { +#if defined(CONFIG_SERIAL_8250_AU1X00) +#if defined(CONFIG_SOC_AU1000) + PORT(UART0_ADDR, AU1000_UART0_INT), + PORT(UART1_ADDR, AU1000_UART1_INT), + PORT(UART2_ADDR, AU1000_UART2_INT), + PORT(UART3_ADDR, AU1000_UART3_INT), +#elif defined(CONFIG_SOC_AU1500) + PORT(UART0_ADDR, AU1500_UART0_INT), + PORT(UART3_ADDR, AU1500_UART3_INT), +#elif defined(CONFIG_SOC_AU1100) + PORT(UART0_ADDR, AU1100_UART0_INT), + PORT(UART1_ADDR, AU1100_UART1_INT), + PORT(UART3_ADDR, AU1100_UART3_INT), +#elif defined(CONFIG_SOC_AU1550) + PORT(UART0_ADDR, AU1550_UART0_INT), + PORT(UART1_ADDR, AU1550_UART1_INT), + PORT(UART3_ADDR, AU1550_UART3_INT), +#elif defined(CONFIG_SOC_AU1200) + PORT(UART0_ADDR, AU1200_UART0_INT), + PORT(UART1_ADDR, AU1200_UART1_INT), +#endif +#endif /* CONFIG_SERIAL_8250_AU1X00 */ + { }, +}; + +static struct platform_device au1xx0_uart_device = { + .name = "serial8250", + .id = PLAT8250_DEV_AU1X00, + .dev = { + .platform_data = au1x00_uart_data, + }, +}; + /* OHCI (USB full speed host controller) */ static struct resource au1xxx_usb_ohci_resources[] = { [0] = { @@ -186,19 +233,6 @@ static struct resource au1200_lcd_resources[] = { } }; -static struct resource au1200_ide0_resources[] = { - [0] = { - .start = AU1XXX_ATA_PHYS_ADDR, - .end = AU1XXX_ATA_PHYS_ADDR + AU1XXX_ATA_PHYS_LEN - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AU1XXX_ATA_INT, - .end = AU1XXX_ATA_INT, - .flags = IORESOURCE_IRQ, - } -}; - static u64 au1200_lcd_dmamask = ~(u32)0; static struct platform_device au1200_lcd_device = { @@ -212,20 +246,6 @@ static struct platform_device au1200_lcd_device = { .resource = au1200_lcd_resources, }; - -static u64 ide0_dmamask = ~(u32)0; - -static struct platform_device au1200_ide0_device = { - .name = "au1200-ide", - .id = 0, - .dev = { - .dma_mask = &ide0_dmamask, - .coherent_dma_mask = 0xffffffff, - }, - .num_resources = ARRAY_SIZE(au1200_ide0_resources), - .resource = au1200_ide0_resources, -}; - static u64 au1xxx_mmc_dmamask = ~(u32)0; static struct platform_device au1xxx_mmc_device = { @@ -245,31 +265,6 @@ static struct platform_device au1x00_pcmcia_device = { .id = 0, }; -#ifdef CONFIG_MIPS_DB1200 - -static struct resource smc91x_resources[] = { - [0] = { - .name = "smc91x-regs", - .start = AU1XXX_SMC91111_PHYS_ADDR, - .end = AU1XXX_SMC91111_PHYS_ADDR + 0xfffff, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AU1XXX_SMC91111_IRQ, - .end = AU1XXX_SMC91111_IRQ, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device smc91x_device = { - .name = "smc91x", - .id = -1, - .num_resources = ARRAY_SIZE(smc91x_resources), - .resource = smc91x_resources, -}; - -#endif - /* All Alchemy demoboards with I2C have this #define in their headers */ #ifdef SMBUS_PSC_BASE static struct resource pbdb_smbus_resources[] = { @@ -289,6 +284,7 @@ static struct platform_device pbdb_smbus_device = { #endif static struct platform_device *au1xxx_platform_devices[] __initdata = { + &au1xx0_uart_device, &au1xxx_usb_ohci_device, &au1x00_pcmcia_device, #ifdef CONFIG_FB_AU1100 @@ -299,12 +295,8 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = { &au1xxx_usb_gdt_device, &au1xxx_usb_otg_device, &au1200_lcd_device, - &au1200_ide0_device, &au1xxx_mmc_device, #endif -#ifdef CONFIG_MIPS_DB1200 - &smc91x_device, -#endif #ifdef SMBUS_PSC_BASE &pbdb_smbus_device, #endif @@ -312,6 +304,13 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = { int __init au1xxx_platform_init(void) { + unsigned int uartclk = get_au1x00_uart_baud_base() * 16; + int i; + + /* Fill up uartclk. */ + for (i = 0; au1x00_uart_data[i].flags ; i++) + au1x00_uart_data[i].uartclk = uartclk; + return platform_add_devices(au1xxx_platform_devices, ARRAY_SIZE(au1xxx_platform_devices)); } diff --git a/arch/mips/au1000/common/power.c b/arch/mips/au1000/common/power.c index 54047d69b82..812a5f8b7d2 100644 --- a/arch/mips/au1000/common/power.c +++ b/arch/mips/au1000/common/power.c @@ -29,17 +29,14 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> #include <linux/pm.h> #include <linux/pm_legacy.h> -#include <linux/slab.h> #include <linux/sysctl.h> #include <linux/jiffies.h> -#include <asm/string.h> #include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/system.h> #include <asm/cacheflush.h> #include <asm/mach-au1x00/au1000.h> @@ -47,17 +44,13 @@ #define DEBUG 1 #ifdef DEBUG -# define DPRINTK(fmt, args...) printk("%s: " fmt, __FUNCTION__ , ## args) +# define DPRINTK(fmt, args...) printk("%s: " fmt, __func__, ## args) #else # define DPRINTK(fmt, args...) #endif static void au1000_calibrate_delay(void); -extern void set_au1x00_speed(unsigned int new_freq); -extern unsigned int get_au1x00_speed(void); -extern unsigned long get_au1x00_uart_baud_base(void); -extern void set_au1x00_uart_baud_base(unsigned long new_baud_base); extern unsigned long save_local_and_disable(int controller); extern void restore_local_and_enable(int controller, unsigned long mask); extern void local_enable_irq(unsigned int irq_nr); diff --git a/arch/mips/au1000/common/prom.c b/arch/mips/au1000/common/prom.c index 90d70695aa6..f10af829e4e 100644 --- a/arch/mips/au1000/common/prom.c +++ b/arch/mips/au1000/common/prom.c @@ -33,8 +33,8 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/module.h> -#include <linux/kernel.h> #include <linux/init.h> #include <linux/string.h> diff --git a/arch/mips/au1000/common/puts.c b/arch/mips/au1000/common/puts.c index 2705829cd46..e34c67e8929 100644 --- a/arch/mips/au1000/common/puts.c +++ b/arch/mips/au1000/common/puts.c @@ -28,7 +28,6 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/types.h> #include <asm/mach-au1x00/au1000.h> #define SERIAL_BASE UART_BASE diff --git a/arch/mips/au1000/common/reset.c b/arch/mips/au1000/common/reset.c index b8638d293cf..60cec537c74 100644 --- a/arch/mips/au1000/common/reset.c +++ b/arch/mips/au1000/common/reset.c @@ -27,13 +27,7 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <asm/io.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/reboot.h> -#include <asm/system.h> + #include <asm/mach-au1x00/au1000.h> extern int au_sleep(void); diff --git a/arch/mips/au1000/common/setup.c b/arch/mips/au1000/common/setup.c index 9e4ab80caab..0e86f7a6b4a 100644 --- a/arch/mips/au1000/common/setup.c +++ b/arch/mips/au1000/common/setup.c @@ -25,21 +25,14 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/sched.h> #include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/delay.h> -#include <linux/interrupt.h> #include <linux/module.h> #include <linux/pm.h> -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> #include <asm/mipsregs.h> #include <asm/reboot.h> -#include <asm/pgtable.h> #include <asm/time.h> #include <au1000.h> @@ -49,8 +42,6 @@ extern void __init board_setup(void); extern void au1000_restart(char *); extern void au1000_halt(void); extern void au1000_power_off(void); -extern void au1x_time_init(void); -extern void au1x_timer_setup(struct irqaction *irq); extern void set_cpuspec(void); void __init plat_mem_setup(void) diff --git a/arch/mips/au1000/common/sleeper.S b/arch/mips/au1000/common/sleeper.S index 683d9da84b6..4b3cf021a45 100644 --- a/arch/mips/au1000/common/sleeper.S +++ b/arch/mips/au1000/common/sleeper.S @@ -9,9 +9,9 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ + #include <asm/asm.h> #include <asm/mipsregs.h> -#include <asm/addrspace.h> #include <asm/regdef.h> #include <asm/stackframe.h> diff --git a/arch/mips/au1000/common/time.c b/arch/mips/au1000/common/time.c index e122bbc6cd8..bdb6d73b26f 100644 --- a/arch/mips/au1000/common/time.c +++ b/arch/mips/au1000/common/time.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2001 MontaVista Software, ppopov@mvista.com + * Copyright (C) 2001, 2006, 2008 MontaVista Software, <source@mvista.com> * Copied and modified Carsten Langgaard's time.c * * Carsten Langgaard, carstenl@mips.com @@ -34,23 +34,13 @@ #include <linux/types.h> #include <linux/init.h> -#include <linux/kernel_stat.h> -#include <linux/sched.h> #include <linux/spinlock.h> -#include <linux/hardirq.h> -#include <asm/compiler.h> #include <asm/mipsregs.h> #include <asm/time.h> -#include <asm/div64.h> #include <asm/mach-au1x00/au1000.h> -#include <linux/mc146818rtc.h> -#include <linux/timex.h> - -static unsigned long r4k_offset; /* Amount to increment compare reg each time */ -static unsigned long r4k_cur; /* What counter should be at next timer irq */ -int no_au1xxx_32khz; +static int no_au1xxx_32khz; extern int allow_au1k_wait; /* default off for CP0 Counter */ #ifdef CONFIG_PM @@ -184,7 +174,7 @@ wakeup_counter0_set(int ticks) * "wait" is enabled, and we need to detect if the 32KHz isn't present * but requested......got it? :-) -- Dan */ -unsigned long cal_r4koff(void) +unsigned long calc_clock(void) { unsigned long cpu_speed; unsigned long flags; @@ -229,19 +219,13 @@ unsigned long cal_r4koff(void) // Equation: Baudrate = CPU / (SD * 2 * CLKDIV * 16) set_au1x00_uart_baud_base(cpu_speed / (2 * ((int)(au_readl(SYS_POWERCTRL)&0x03) + 2) * 16)); spin_unlock_irqrestore(&time_lock, flags); - return (cpu_speed / HZ); + return cpu_speed; } void __init plat_time_init(void) { - unsigned int est_freq; - - printk("calculating r4koff... "); - r4k_offset = cal_r4koff(); - printk("%08lx(%d)\n", r4k_offset, (int) r4k_offset); + unsigned int est_freq = calc_clock(); - //est_freq = 2*r4k_offset*HZ; - est_freq = r4k_offset*HZ; est_freq += 5000; /* round */ est_freq -= est_freq%10000; printk("CPU frequency %d.%02d MHz\n", est_freq/1000000, @@ -249,9 +233,6 @@ void __init plat_time_init(void) set_au1x00_speed(est_freq); set_au1x00_lcd_clock(); // program the LCD clock - r4k_cur = (read_c0_count() + r4k_offset); - write_c0_compare(r4k_cur); - #ifdef CONFIG_PM /* * setup counter 0, since it keeps ticking after a @@ -265,12 +246,8 @@ void __init plat_time_init(void) * Check to ensure we really have a 32KHz oscillator before * we do this. */ - if (no_au1xxx_32khz) { + if (no_au1xxx_32khz) printk("WARNING: no 32KHz clock found.\n"); - - /* Ensure we get CPO_COUNTER interrupts. */ - set_c0_status(IE_IRQ5); - } else { while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S); au_writel(0, SYS_TOYWRITE); diff --git a/arch/mips/au1000/db1x00/board_setup.c b/arch/mips/au1000/db1x00/board_setup.c index 99eafeada51..b7dcbad5c58 100644 --- a/arch/mips/au1000/db1x00/board_setup.c +++ b/arch/mips/au1000/db1x00/board_setup.c @@ -27,20 +27,9 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/console.h> -#include <linux/mc146818rtc.h> -#include <linux/delay.h> - -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> -#include <asm/mipsregs.h> -#include <asm/reboot.h> -#include <asm/pgtable.h> + #include <asm/mach-au1x00/au1000.h> #include <asm/mach-db1x00/db1x00.h> diff --git a/arch/mips/au1000/db1x00/init.c b/arch/mips/au1000/db1x00/init.c index e822c123eab..d3b967caf70 100644 --- a/arch/mips/au1000/db1x00/init.c +++ b/arch/mips/au1000/db1x00/init.c @@ -28,13 +28,8 @@ */ #include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/bootmem.h> -#include <linux/string.h> #include <linux/kernel.h> -#include <asm/addrspace.h> #include <asm/bootinfo.h> #include <prom.h> diff --git a/arch/mips/au1000/db1x00/irqmap.c b/arch/mips/au1000/db1x00/irqmap.c index 09cea03411b..eaa50c7b634 100644 --- a/arch/mips/au1000/db1x00/irqmap.c +++ b/arch/mips/au1000/db1x00/irqmap.c @@ -25,26 +25,9 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/errno.h> + #include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel_stat.h> -#include <linux/module.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/delay.h> -#include <linux/bitops.h> -#include <asm/bootinfo.h> -#include <asm/io.h> -#include <asm/mipsregs.h> -#include <asm/system.h> #include <asm/mach-au1x00/au1000.h> #ifdef CONFIG_MIPS_DB1500 diff --git a/arch/mips/au1000/mtx-1/board_setup.c b/arch/mips/au1000/mtx-1/board_setup.c index 310d5dff89f..5736354829c 100644 --- a/arch/mips/au1000/mtx-1/board_setup.c +++ b/arch/mips/au1000/mtx-1/board_setup.c @@ -28,19 +28,9 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/console.h> -#include <linux/delay.h> -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> -#include <asm/mipsregs.h> -#include <asm/reboot.h> -#include <asm/pgtable.h> #include <asm/mach-au1x00/au1000.h> extern int (*board_pci_idsel)(unsigned int devsel, int assert); diff --git a/arch/mips/au1000/mtx-1/init.c b/arch/mips/au1000/mtx-1/init.c index e700fd312a2..c015cbce1cc 100644 --- a/arch/mips/au1000/mtx-1/init.c +++ b/arch/mips/au1000/mtx-1/init.c @@ -28,14 +28,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/string.h> + #include <linux/kernel.h> -#include <linux/sched.h> #include <linux/init.h> -#include <linux/mm.h> -#include <linux/bootmem.h> -#include <asm/addrspace.h> #include <asm/bootinfo.h> #include <prom.h> diff --git a/arch/mips/au1000/mtx-1/irqmap.c b/arch/mips/au1000/mtx-1/irqmap.c index 49c612aeddc..78d70c42c9d 100644 --- a/arch/mips/au1000/mtx-1/irqmap.c +++ b/arch/mips/au1000/mtx-1/irqmap.c @@ -25,26 +25,9 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/errno.h> + #include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel_stat.h> -#include <linux/module.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/delay.h> -#include <linux/bitops.h> -#include <asm/bootinfo.h> -#include <asm/io.h> -#include <asm/mipsregs.h> -#include <asm/system.h> #include <asm/mach-au1x00/au1000.h> char irq_tab_alchemy[][5] __initdata = { diff --git a/arch/mips/au1000/mtx-1/platform.c b/arch/mips/au1000/mtx-1/platform.c index ce8637b3afa..a7edbf0829a 100644 --- a/arch/mips/au1000/mtx-1/platform.c +++ b/arch/mips/au1000/mtx-1/platform.c @@ -19,7 +19,6 @@ */ #include <linux/init.h> -#include <linux/types.h> #include <linux/platform_device.h> #include <linux/leds.h> #include <linux/gpio_keys.h> diff --git a/arch/mips/au1000/pb1000/board_setup.c b/arch/mips/au1000/pb1000/board_setup.c index 5198c4f98b4..33f15acc1b1 100644 --- a/arch/mips/au1000/pb1000/board_setup.c +++ b/arch/mips/au1000/pb1000/board_setup.c @@ -23,19 +23,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/console.h> #include <linux/delay.h> -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> -#include <asm/mipsregs.h> -#include <asm/reboot.h> -#include <asm/pgtable.h> #include <asm/mach-au1x00/au1000.h> #include <asm/mach-pb1x00/pb1000.h> diff --git a/arch/mips/au1000/pb1000/init.c b/arch/mips/au1000/pb1000/init.c index 2515b9fb24a..549447df71d 100644 --- a/arch/mips/au1000/pb1000/init.c +++ b/arch/mips/au1000/pb1000/init.c @@ -26,14 +26,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/bootmem.h> -#include <linux/string.h> #include <linux/kernel.h> -#include <asm/addrspace.h> #include <asm/bootinfo.h> #include <prom.h> diff --git a/arch/mips/au1000/pb1000/irqmap.c b/arch/mips/au1000/pb1000/irqmap.c index 88e35450820..b3d56b0af32 100644 --- a/arch/mips/au1000/pb1000/irqmap.c +++ b/arch/mips/au1000/pb1000/irqmap.c @@ -25,26 +25,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/errno.h> + #include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel_stat.h> -#include <linux/module.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/types.h> #include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/delay.h> -#include <linux/bitops.h> -#include <asm/bootinfo.h> -#include <asm/io.h> -#include <asm/mipsregs.h> -#include <asm/system.h> #include <asm/mach-au1x00/au1000.h> struct au1xxx_irqmap __initdata au1xxx_irq_map[] = { diff --git a/arch/mips/au1000/pb1100/board_setup.c b/arch/mips/au1000/pb1100/board_setup.c index 42874a6b31d..656164c8e9c 100644 --- a/arch/mips/au1000/pb1100/board_setup.c +++ b/arch/mips/au1000/pb1100/board_setup.c @@ -23,19 +23,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/console.h> #include <linux/delay.h> -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> -#include <asm/mipsregs.h> -#include <asm/reboot.h> -#include <asm/pgtable.h> #include <asm/mach-au1x00/au1000.h> #include <asm/mach-pb1x00/pb1100.h> diff --git a/arch/mips/au1000/pb1100/init.c b/arch/mips/au1000/pb1100/init.c index 490c3801c27..c91344648ed 100644 --- a/arch/mips/au1000/pb1100/init.c +++ b/arch/mips/au1000/pb1100/init.c @@ -27,14 +27,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/bootmem.h> -#include <linux/string.h> #include <linux/kernel.h> -#include <asm/addrspace.h> #include <asm/bootinfo.h> #include <prom.h> diff --git a/arch/mips/au1000/pb1100/irqmap.c b/arch/mips/au1000/pb1100/irqmap.c index 880456bf8c1..b5021e3d477 100644 --- a/arch/mips/au1000/pb1100/irqmap.c +++ b/arch/mips/au1000/pb1100/irqmap.c @@ -25,26 +25,9 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/errno.h> + #include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel_stat.h> -#include <linux/module.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/delay.h> -#include <linux/bitops.h> -#include <asm/bootinfo.h> -#include <asm/io.h> -#include <asm/mipsregs.h> -#include <asm/system.h> #include <asm/mach-au1x00/au1000.h> struct au1xxx_irqmap __initdata au1xxx_irq_map[] = { diff --git a/arch/mips/au1000/pb1200/Makefile b/arch/mips/au1000/pb1200/Makefile index 970b1b1d5cd..4fe02ea65a6 100644 --- a/arch/mips/au1000/pb1200/Makefile +++ b/arch/mips/au1000/pb1200/Makefile @@ -3,5 +3,6 @@ # lib-y := init.o board_setup.o irqmap.o +obj-y += platform.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/au1000/pb1200/board_setup.c b/arch/mips/au1000/pb1200/board_setup.c index b98bebfa87c..4493a792cc4 100644 --- a/arch/mips/au1000/pb1200/board_setup.c +++ b/arch/mips/au1000/pb1200/board_setup.c @@ -23,27 +23,11 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> #include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/console.h> -#include <linux/mc146818rtc.h> -#include <linux/delay.h> - -#if defined(CONFIG_BLK_DEV_IDE_AU1XXX) -#include <linux/ide.h> -#endif - -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> -#include <asm/mipsregs.h> -#include <asm/reboot.h> -#include <asm/pgtable.h> #include <au1000.h> -#include <au1xxx_dbdma.h> #include <prom.h> #ifdef CONFIG_MIPS_PB1200 @@ -52,8 +36,6 @@ #ifdef CONFIG_MIPS_DB1200 #include <asm/mach-db1x00/db1200.h> -#define PB1200_ETH_INT DB1200_ETH_INT -#define PB1200_IDE_INT DB1200_IDE_INT #endif extern void _board_init_irq(void); diff --git a/arch/mips/au1000/pb1200/init.c b/arch/mips/au1000/pb1200/init.c index 069ed45f04f..72af5500660 100644 --- a/arch/mips/au1000/pb1200/init.c +++ b/arch/mips/au1000/pb1200/init.c @@ -27,14 +27,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/bootmem.h> -#include <linux/string.h> #include <linux/kernel.h> -#include <asm/addrspace.h> #include <asm/bootinfo.h> #include <prom.h> diff --git a/arch/mips/au1000/pb1200/irqmap.c b/arch/mips/au1000/pb1200/irqmap.c index 8fcd0df86f9..e61eb8e0b76 100644 --- a/arch/mips/au1000/pb1200/irqmap.c +++ b/arch/mips/au1000/pb1200/irqmap.c @@ -22,26 +22,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/errno.h> + #include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel_stat.h> -#include <linux/module.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/types.h> #include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/delay.h> -#include <linux/bitops.h> - -#include <asm/bootinfo.h> -#include <asm/io.h> -#include <asm/mipsregs.h> -#include <asm/system.h> + #include <asm/mach-au1x00/au1000.h> #ifdef CONFIG_MIPS_PB1200 diff --git a/arch/mips/au1000/pb1200/platform.c b/arch/mips/au1000/pb1200/platform.c new file mode 100644 index 00000000000..5930110b9b6 --- /dev/null +++ b/arch/mips/au1000/pb1200/platform.c @@ -0,0 +1,84 @@ +/* + * Pb1200/DBAu1200 board platform device registration + * + * Copyright (C) 2008 MontaVista Software Inc. <source@mvista.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <linux/init.h> +#include <linux/platform_device.h> + +#include <asm/mach-au1x00/au1xxx.h> + +static struct resource ide_resources[] = { + [0] = { + .start = IDE_PHYS_ADDR, + .end = IDE_PHYS_ADDR + IDE_PHYS_LEN - 1, + .flags = IORESOURCE_MEM + }, + [1] = { + .start = IDE_INT, + .end = IDE_INT, + .flags = IORESOURCE_IRQ + } +}; + +static u64 ide_dmamask = ~(u32)0; + +static struct platform_device ide_device = { + .name = "au1200-ide", + .id = 0, + .dev = { + .dma_mask = &ide_dmamask, + .coherent_dma_mask = 0xffffffff, + }, + .num_resources = ARRAY_SIZE(ide_resources), + .resource = ide_resources +}; + +static struct resource smc91c111_resources[] = { + [0] = { + .name = "smc91x-regs", + .start = SMC91C111_PHYS_ADDR, + .end = SMC91C111_PHYS_ADDR + 0xf, + .flags = IORESOURCE_MEM + }, + [1] = { + .start = SMC91C111_INT, + .end = SMC91C111_INT, + .flags = IORESOURCE_IRQ + }, +}; + +static struct platform_device smc91c111_device = { + .name = "smc91x", + .id = -1, + .num_resources = ARRAY_SIZE(smc91c111_resources), + .resource = smc91c111_resources +}; + +static struct platform_device *board_platform_devices[] __initdata = { + &ide_device, + &smc91c111_device +}; + +static int __init board_register_devices(void) +{ + return platform_add_devices(board_platform_devices, + ARRAY_SIZE(board_platform_devices)); +} + +arch_initcall(board_register_devices); diff --git a/arch/mips/au1000/pb1500/board_setup.c b/arch/mips/au1000/pb1500/board_setup.c index 5446836869d..24c652e8ec4 100644 --- a/arch/mips/au1000/pb1500/board_setup.c +++ b/arch/mips/au1000/pb1500/board_setup.c @@ -23,19 +23,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/console.h> #include <linux/delay.h> -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> -#include <asm/mipsregs.h> -#include <asm/reboot.h> -#include <asm/pgtable.h> #include <asm/mach-au1x00/au1000.h> #include <asm/mach-pb1x00/pb1500.h> diff --git a/arch/mips/au1000/pb1500/init.c b/arch/mips/au1000/pb1500/init.c index db558c96704..488507c07db 100644 --- a/arch/mips/au1000/pb1500/init.c +++ b/arch/mips/au1000/pb1500/init.c @@ -27,14 +27,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/bootmem.h> -#include <linux/string.h> #include <linux/kernel.h> -#include <asm/addrspace.h> #include <asm/bootinfo.h> #include <prom.h> diff --git a/arch/mips/au1000/pb1500/irqmap.c b/arch/mips/au1000/pb1500/irqmap.c index 810f695e24b..4817ab44d07 100644 --- a/arch/mips/au1000/pb1500/irqmap.c +++ b/arch/mips/au1000/pb1500/irqmap.c @@ -25,26 +25,9 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/errno.h> + #include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel_stat.h> -#include <linux/module.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/delay.h> -#include <linux/bitops.h> -#include <asm/bootinfo.h> -#include <asm/io.h> -#include <asm/mipsregs.h> -#include <asm/system.h> #include <asm/mach-au1x00/au1000.h> char irq_tab_alchemy[][5] __initdata = { diff --git a/arch/mips/au1000/pb1550/board_setup.c b/arch/mips/au1000/pb1550/board_setup.c index e3cfb0d7318..45d60872b56 100644 --- a/arch/mips/au1000/pb1550/board_setup.c +++ b/arch/mips/au1000/pb1550/board_setup.c @@ -27,20 +27,9 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/console.h> -#include <linux/mc146818rtc.h> -#include <linux/delay.h> -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> -#include <asm/mipsregs.h> -#include <asm/reboot.h> -#include <asm/pgtable.h> #include <asm/mach-au1x00/au1000.h> #include <asm/mach-pb1x00/pb1550.h> diff --git a/arch/mips/au1000/pb1550/init.c b/arch/mips/au1000/pb1550/init.c index b716363ea56..f6b2fc58798 100644 --- a/arch/mips/au1000/pb1550/init.c +++ b/arch/mips/au1000/pb1550/init.c @@ -27,14 +27,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/bootmem.h> -#include <linux/string.h> #include <linux/kernel.h> -#include <asm/addrspace.h> #include <asm/bootinfo.h> #include <prom.h> diff --git a/arch/mips/au1000/pb1550/irqmap.c b/arch/mips/au1000/pb1550/irqmap.c index 56becab28e5..e1dac37af08 100644 --- a/arch/mips/au1000/pb1550/irqmap.c +++ b/arch/mips/au1000/pb1550/irqmap.c @@ -25,26 +25,9 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/errno.h> + #include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel_stat.h> -#include <linux/module.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/delay.h> -#include <linux/bitops.h> -#include <asm/bootinfo.h> -#include <asm/io.h> -#include <asm/mipsregs.h> -#include <asm/system.h> #include <asm/mach-au1x00/au1000.h> char irq_tab_alchemy[][5] __initdata = { diff --git a/arch/mips/au1000/xxs1500/board_setup.c b/arch/mips/au1000/xxs1500/board_setup.c index b2e413e597a..79d1798621b 100644 --- a/arch/mips/au1000/xxs1500/board_setup.c +++ b/arch/mips/au1000/xxs1500/board_setup.c @@ -23,19 +23,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/console.h> #include <linux/delay.h> -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> -#include <asm/mipsregs.h> -#include <asm/reboot.h> -#include <asm/pgtable.h> #include <asm/mach-au1x00/au1000.h> void board_reset(void) diff --git a/arch/mips/au1000/xxs1500/init.c b/arch/mips/au1000/xxs1500/init.c index 7e6878c1b0a..24fc6e132dc 100644 --- a/arch/mips/au1000/xxs1500/init.c +++ b/arch/mips/au1000/xxs1500/init.c @@ -26,14 +26,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/bootmem.h> -#include <linux/string.h> #include <linux/kernel.h> -#include <asm/addrspace.h> #include <asm/bootinfo.h> #include <prom.h> diff --git a/arch/mips/au1000/xxs1500/irqmap.c b/arch/mips/au1000/xxs1500/irqmap.c index a343da13433..dd6e3d1eb4d 100644 --- a/arch/mips/au1000/xxs1500/irqmap.c +++ b/arch/mips/au1000/xxs1500/irqmap.c @@ -25,26 +25,9 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/errno.h> + #include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel_stat.h> -#include <linux/module.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/delay.h> -#include <linux/bitops.h> -#include <asm/bootinfo.h> -#include <asm/io.h> -#include <asm/mipsregs.h> -#include <asm/system.h> #include <asm/mach-au1x00/au1000.h> struct au1xxx_irqmap __initdata au1xxx_irq_map[] = { diff --git a/arch/mips/configs/mipssim_defconfig b/arch/mips/configs/mipssim_defconfig index 6db0bdaefb2..4f6bce99d5c 100644 --- a/arch/mips/configs/mipssim_defconfig +++ b/arch/mips/configs/mipssim_defconfig @@ -641,7 +641,6 @@ CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="nfsroot=192.168.192.169:/u1/mipsel,timeo=20 ip=dhcp" # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_RUNTIME_DEBUG is not set -# CONFIG_MIPS_UNCACHED is not set # # Security options diff --git a/arch/mips/configs/pnx8550-jbs_defconfig b/arch/mips/configs/pnx8550-jbs_defconfig index 518a60892b7..780c7fc24b8 100644 --- a/arch/mips/configs/pnx8550-jbs_defconfig +++ b/arch/mips/configs/pnx8550-jbs_defconfig @@ -1223,7 +1223,6 @@ CONFIG_CMDLINE="console=ttyS1,38400n8 kgdb=ttyS0 root=/dev/nfs ip=bootp" # CONFIG_KGDB is not set CONFIG_SYS_SUPPORTS_KGDB=y # CONFIG_RUNTIME_DEBUG is not set -# CONFIG_MIPS_UNCACHED is not set # # Security options diff --git a/arch/mips/configs/pnx8550-stb810_defconfig b/arch/mips/configs/pnx8550-stb810_defconfig index 68351eb81bc..267f21ed1d0 100644 --- a/arch/mips/configs/pnx8550-stb810_defconfig +++ b/arch/mips/configs/pnx8550-stb810_defconfig @@ -1213,7 +1213,6 @@ CONFIG_CMDLINE="console=ttyS1,38400n8 kgdb=ttyS0 root=/dev/nfs ip=bootp" # CONFIG_KGDB is not set CONFIG_SYS_SUPPORTS_KGDB=y # CONFIG_RUNTIME_DEBUG is not set -# CONFIG_MIPS_UNCACHED is not set # # Security options diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c index 60349062595..3965fda94a8 100644 --- a/arch/mips/dec/time.c +++ b/arch/mips/dec/time.c @@ -9,30 +9,15 @@ * */ #include <linux/bcd.h> -#include <linux/errno.h> #include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/kernel.h> #include <linux/mc146818rtc.h> -#include <linux/mm.h> -#include <linux/module.h> #include <linux/param.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/time.h> -#include <linux/types.h> - -#include <asm/bootinfo.h> -#include <asm/cpu.h> -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/mipsregs.h> -#include <asm/sections.h> -#include <asm/time.h> +#include <asm/cpu-features.h> +#include <asm/ds1287.h> +#include <asm/time.h> #include <asm/dec/interrupts.h> #include <asm/dec/ioasic.h> -#include <asm/dec/ioasic_addrs.h> #include <asm/dec/machtype.h> unsigned long read_persistent_clock(void) @@ -139,42 +124,32 @@ int rtc_mips_set_mmss(unsigned long nowtime) return retval; } -static int dec_timer_state(void) +void __init plat_time_init(void) { - return (CMOS_READ(RTC_REG_C) & RTC_PF) != 0; -} + u32 start, end; + int i = HZ / 10; -static void dec_timer_ack(void) -{ - CMOS_READ(RTC_REG_C); /* Ack the RTC interrupt. */ -} - -static cycle_t dec_ioasic_hpt_read(void) -{ - /* - * The free-running counter is 32-bit which is good for about - * 2 minutes, 50 seconds at possible count rates of up to 25MHz. - */ - return ioasic_read(IO_REG_FCTR); -} + /* Set up the rate of periodic DS1287 interrupts. */ + ds1287_set_base_clock(HZ); + if (cpu_has_counter) { + while (!ds1287_timer_state()) + ; -void __init plat_time_init(void) -{ - mips_timer_ack = dec_timer_ack; + start = read_c0_count(); - if (!cpu_has_counter && IOASIC) - /* For pre-R4k systems we use the I/O ASIC's counter. */ - clocksource_mips.read = dec_ioasic_hpt_read; + while (i--) + while (!ds1287_timer_state()) + ; - /* Set up the rate of periodic DS1287 interrupts. */ - CMOS_WRITE(RTC_REF_CLCK_32KHZ | (16 - __ffs(HZ)), RTC_REG_A); -} + end = read_c0_count(); -void __init plat_timer_setup(struct irqaction *irq) -{ - setup_irq(dec_interrupt[DEC_IRQ_RTC], irq); + mips_hpt_frequency = (end - start) * 10; + printk(KERN_INFO "MIPS counter frequency %dHz\n", + mips_hpt_frequency); + } else if (IOASIC) + /* For pre-R4k systems we use the I/O ASIC's counter. */ + dec_ioasic_clocksource_init(); - /* Enable periodic DS1287 interrupts. */ - CMOS_WRITE(CMOS_READ(RTC_REG_B) | RTC_PIE, RTC_REG_B); + ds1287_clockevent_init(dec_interrupt[DEC_IRQ_RTC]); } diff --git a/arch/mips/jmr3927/rbhma3100/setup.c b/arch/mips/jmr3927/rbhma3100/setup.c index c886d804d30..f39c444e42d 100644 --- a/arch/mips/jmr3927/rbhma3100/setup.c +++ b/arch/mips/jmr3927/rbhma3100/setup.c @@ -36,11 +36,13 @@ #include <linux/pm.h> #include <linux/platform_device.h> #include <linux/clk.h> +#include <linux/gpio.h> #ifdef CONFIG_SERIAL_TXX9 #include <linux/serial_core.h> #endif #include <asm/txx9tmr.h> +#include <asm/txx9pio.h> #include <asm/reboot.h> #include <asm/jmr3927/jmr3927.h> #include <asm/mipsregs.h> @@ -340,9 +342,12 @@ static void __init tx3927_setup(void) /* PIO */ /* PIO[15:12] connected to LEDs */ - tx3927_pioptr->dir = 0x0000f000; - tx3927_pioptr->maskcpu = 0; - tx3927_pioptr->maskext = 0; + __raw_writel(0x0000f000, &tx3927_pioptr->dir); + __raw_writel(0, &tx3927_pioptr->maskcpu); + __raw_writel(0, &tx3927_pioptr->maskext); + txx9_gpio_init(TX3927_PIO_REG, 0, 16); + gpio_request(11, "dipsw1"); + gpio_request(10, "dipsw2"); { unsigned int conf; diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 6fcdb6fda2e..45545be3eb8 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -10,12 +10,15 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o obj-$(CONFIG_CEVT_R4K) += cevt-r4k.o +obj-$(CONFIG_CEVT_DS1287) += cevt-ds1287.o obj-$(CONFIG_CEVT_GT641XX) += cevt-gt641xx.o obj-$(CONFIG_CEVT_SB1250) += cevt-sb1250.o obj-$(CONFIG_CEVT_TXX9) += cevt-txx9.o obj-$(CONFIG_CSRC_BCM1480) += csrc-bcm1480.o +obj-$(CONFIG_CSRC_IOASIC) += csrc-ioasic.o obj-$(CONFIG_CSRC_R4K) += csrc-r4k.o obj-$(CONFIG_CSRC_SB1250) += csrc-sb1250.o +obj-$(CONFIG_SYNC_R4K) += sync-r4k.o binfmt_irix-objs := irixelf.o irixinv.o irixioctl.o irixsig.o \ irix5sys.o sysirix.o @@ -50,6 +53,8 @@ obj-$(CONFIG_MIPS_MT) += mips-mt.o obj-$(CONFIG_MIPS_MT_FPAFF) += mips-mt-fpaff.o obj-$(CONFIG_MIPS_MT_SMTC) += smtc.o smtc-asm.o smtc-proc.o obj-$(CONFIG_MIPS_MT_SMP) += smp-mt.o +obj-$(CONFIG_MIPS_CMP) += smp-cmp.o +obj-$(CONFIG_CPU_MIPSR2) += spram.o obj-$(CONFIG_MIPS_APSP_KSPD) += kspd.o obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o @@ -62,6 +67,7 @@ obj-$(CONFIG_IRQ_CPU_RM9K) += irq-rm9000.o obj-$(CONFIG_MIPS_BOARDS_GEN) += irq-msc01.o obj-$(CONFIG_IRQ_TXX9) += irq_txx9.o obj-$(CONFIG_IRQ_GT641XX) += irq-gt641xx.o +obj-$(CONFIG_IRQ_GIC) += irq-gic.o obj-$(CONFIG_32BIT) += scall32-o32.o obj-$(CONFIG_64BIT) += scall64-64.o @@ -77,6 +83,8 @@ obj-$(CONFIG_64BIT) += cpu-bugs64.o obj-$(CONFIG_I8253) += i8253.o +obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o + obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index ca136298acd..5bf03b3c415 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -17,252 +17,252 @@ #include <asm/ptrace.h> #include <asm/processor.h> -#define text(t) __asm__("\n@@@" t) +#define text(t) __asm__("\n->#" t) #define _offset(type, member) (&(((type *)NULL)->member)) #define offset(string, ptr, member) \ - __asm__("\n@@@" string "%0" : : "i" (_offset(ptr, member))) + __asm__("\n->" string " %0" : : "i" (_offset(ptr, member))) #define constant(string, member) \ - __asm__("\n@@@" string "%X0" : : "ri" (member)) + __asm__("\n->" string " %0" : : "ri" (member)) #define size(string, size) \ - __asm__("\n@@@" string "%0" : : "i" (sizeof(size))) + __asm__("\n->" string " %0" : : "i" (sizeof(size))) #define linefeed text("") void output_ptreg_defines(void) { - text("/* MIPS pt_regs offsets. */"); - offset("#define PT_R0 ", struct pt_regs, regs[0]); - offset("#define PT_R1 ", struct pt_regs, regs[1]); - offset("#define PT_R2 ", struct pt_regs, regs[2]); - offset("#define PT_R3 ", struct pt_regs, regs[3]); - offset("#define PT_R4 ", struct pt_regs, regs[4]); - offset("#define PT_R5 ", struct pt_regs, regs[5]); - offset("#define PT_R6 ", struct pt_regs, regs[6]); - offset("#define PT_R7 ", struct pt_regs, regs[7]); - offset("#define PT_R8 ", struct pt_regs, regs[8]); - offset("#define PT_R9 ", struct pt_regs, regs[9]); - offset("#define PT_R10 ", struct pt_regs, regs[10]); - offset("#define PT_R11 ", struct pt_regs, regs[11]); - offset("#define PT_R12 ", struct pt_regs, regs[12]); - offset("#define PT_R13 ", struct pt_regs, regs[13]); - offset("#define PT_R14 ", struct pt_regs, regs[14]); - offset("#define PT_R15 ", struct pt_regs, regs[15]); - offset("#define PT_R16 ", struct pt_regs, regs[16]); - offset("#define PT_R17 ", struct pt_regs, regs[17]); - offset("#define PT_R18 ", struct pt_regs, regs[18]); - offset("#define PT_R19 ", struct pt_regs, regs[19]); - offset("#define PT_R20 ", struct pt_regs, regs[20]); - offset("#define PT_R21 ", struct pt_regs, regs[21]); - offset("#define PT_R22 ", struct pt_regs, regs[22]); - offset("#define PT_R23 ", struct pt_regs, regs[23]); - offset("#define PT_R24 ", struct pt_regs, regs[24]); - offset("#define PT_R25 ", struct pt_regs, regs[25]); - offset("#define PT_R26 ", struct pt_regs, regs[26]); - offset("#define PT_R27 ", struct pt_regs, regs[27]); - offset("#define PT_R28 ", struct pt_regs, regs[28]); - offset("#define PT_R29 ", struct pt_regs, regs[29]); - offset("#define PT_R30 ", struct pt_regs, regs[30]); - offset("#define PT_R31 ", struct pt_regs, regs[31]); - offset("#define PT_LO ", struct pt_regs, lo); - offset("#define PT_HI ", struct pt_regs, hi); + text("MIPS pt_regs offsets."); + offset("PT_R0", struct pt_regs, regs[0]); + offset("PT_R1", struct pt_regs, regs[1]); + offset("PT_R2", struct pt_regs, regs[2]); + offset("PT_R3", struct pt_regs, regs[3]); + offset("PT_R4", struct pt_regs, regs[4]); + offset("PT_R5", struct pt_regs, regs[5]); + offset("PT_R6", struct pt_regs, regs[6]); + offset("PT_R7", struct pt_regs, regs[7]); + offset("PT_R8", struct pt_regs, regs[8]); + offset("PT_R9", struct pt_regs, regs[9]); + offset("PT_R10", struct pt_regs, regs[10]); + offset("PT_R11", struct pt_regs, regs[11]); + offset("PT_R12", struct pt_regs, regs[12]); + offset("PT_R13", struct pt_regs, regs[13]); + offset("PT_R14", struct pt_regs, regs[14]); + offset("PT_R15", struct pt_regs, regs[15]); + offset("PT_R16", struct pt_regs, regs[16]); + offset("PT_R17", struct pt_regs, regs[17]); + offset("PT_R18", struct pt_regs, regs[18]); + offset("PT_R19", struct pt_regs, regs[19]); + offset("PT_R20", struct pt_regs, regs[20]); + offset("PT_R21", struct pt_regs, regs[21]); + offset("PT_R22", struct pt_regs, regs[22]); + offset("PT_R23", struct pt_regs, regs[23]); + offset("PT_R24", struct pt_regs, regs[24]); + offset("PT_R25", struct pt_regs, regs[25]); + offset("PT_R26", struct pt_regs, regs[26]); + offset("PT_R27", struct pt_regs, regs[27]); + offset("PT_R28", struct pt_regs, regs[28]); + offset("PT_R29", struct pt_regs, regs[29]); + offset("PT_R30", struct pt_regs, regs[30]); + offset("PT_R31", struct pt_regs, regs[31]); + offset("PT_LO", struct pt_regs, lo); + offset("PT_HI", struct pt_regs, hi); #ifdef CONFIG_CPU_HAS_SMARTMIPS - offset("#define PT_ACX ", struct pt_regs, acx); + offset("PT_ACX", struct pt_regs, acx); #endif - offset("#define PT_EPC ", struct pt_regs, cp0_epc); - offset("#define PT_BVADDR ", struct pt_regs, cp0_badvaddr); - offset("#define PT_STATUS ", struct pt_regs, cp0_status); - offset("#define PT_CAUSE ", struct pt_regs, cp0_cause); + offset("PT_EPC", struct pt_regs, cp0_epc); + offset("PT_BVADDR", struct pt_regs, cp0_badvaddr); + offset("PT_STATUS", struct pt_regs, cp0_status); + offset("PT_CAUSE", struct pt_regs, cp0_cause); #ifdef CONFIG_MIPS_MT_SMTC - offset("#define PT_TCSTATUS ", struct pt_regs, cp0_tcstatus); + offset("PT_TCSTATUS", struct pt_regs, cp0_tcstatus); #endif /* CONFIG_MIPS_MT_SMTC */ - size("#define PT_SIZE ", struct pt_regs); + size("PT_SIZE", struct pt_regs); linefeed; } void output_task_defines(void) { - text("/* MIPS task_struct offsets. */"); - offset("#define TASK_STATE ", struct task_struct, state); - offset("#define TASK_THREAD_INFO ", struct task_struct, stack); - offset("#define TASK_FLAGS ", struct task_struct, flags); - offset("#define TASK_MM ", struct task_struct, mm); - offset("#define TASK_PID ", struct task_struct, pid); - size( "#define TASK_STRUCT_SIZE ", struct task_struct); + text("MIPS task_struct offsets."); + offset("TASK_STATE", struct task_struct, state); + offset("TASK_THREAD_INFO", struct task_struct, stack); + offset("TASK_FLAGS", struct task_struct, flags); + offset("TASK_MM", struct task_struct, mm); + offset("TASK_PID", struct task_struct, pid); + size( "TASK_STRUCT_SIZE", struct task_struct); linefeed; } void output_thread_info_defines(void) { - text("/* MIPS thread_info offsets. */"); - offset("#define TI_TASK ", struct thread_info, task); - offset("#define TI_EXEC_DOMAIN ", struct thread_info, exec_domain); - offset("#define TI_FLAGS ", struct thread_info, flags); - offset("#define TI_TP_VALUE ", struct thread_info, tp_value); - offset("#define TI_CPU ", struct thread_info, cpu); - offset("#define TI_PRE_COUNT ", struct thread_info, preempt_count); - offset("#define TI_ADDR_LIMIT ", struct thread_info, addr_limit); - offset("#define TI_RESTART_BLOCK ", struct thread_info, restart_block); - offset("#define TI_REGS ", struct thread_info, regs); - constant("#define _THREAD_SIZE ", THREAD_SIZE); - constant("#define _THREAD_MASK ", THREAD_MASK); + text("MIPS thread_info offsets."); + offset("TI_TASK", struct thread_info, task); + offset("TI_EXEC_DOMAIN", struct thread_info, exec_domain); + offset("TI_FLAGS", struct thread_info, flags); + offset("TI_TP_VALUE", struct thread_info, tp_value); + offset("TI_CPU", struct thread_info, cpu); + offset("TI_PRE_COUNT", struct thread_info, preempt_count); + offset("TI_ADDR_LIMIT", struct thread_info, addr_limit); + offset("TI_RESTART_BLOCK", struct thread_info, restart_block); + offset("TI_REGS", struct thread_info, regs); + constant("_THREAD_SIZE", THREAD_SIZE); + constant("_THREAD_MASK", THREAD_MASK); linefeed; } void output_thread_defines(void) { - text("/* MIPS specific thread_struct offsets. */"); - offset("#define THREAD_REG16 ", struct task_struct, thread.reg16); - offset("#define THREAD_REG17 ", struct task_struct, thread.reg17); - offset("#define THREAD_REG18 ", struct task_struct, thread.reg18); - offset("#define THREAD_REG19 ", struct task_struct, thread.reg19); - offset("#define THREAD_REG20 ", struct task_struct, thread.reg20); - offset("#define THREAD_REG21 ", struct task_struct, thread.reg21); - offset("#define THREAD_REG22 ", struct task_struct, thread.reg22); - offset("#define THREAD_REG23 ", struct task_struct, thread.reg23); - offset("#define THREAD_REG29 ", struct task_struct, thread.reg29); - offset("#define THREAD_REG30 ", struct task_struct, thread.reg30); - offset("#define THREAD_REG31 ", struct task_struct, thread.reg31); - offset("#define THREAD_STATUS ", struct task_struct, + text("MIPS specific thread_struct offsets."); + offset("THREAD_REG16", struct task_struct, thread.reg16); + offset("THREAD_REG17", struct task_struct, thread.reg17); + offset("THREAD_REG18", struct task_struct, thread.reg18); + offset("THREAD_REG19", struct task_struct, thread.reg19); + offset("THREAD_REG20", struct task_struct, thread.reg20); + offset("THREAD_REG21", struct task_struct, thread.reg21); + offset("THREAD_REG22", struct task_struct, thread.reg22); + offset("THREAD_REG23", struct task_struct, thread.reg23); + offset("THREAD_REG29", struct task_struct, thread.reg29); + offset("THREAD_REG30", struct task_struct, thread.reg30); + offset("THREAD_REG31", struct task_struct, thread.reg31); + offset("THREAD_STATUS", struct task_struct, thread.cp0_status); - offset("#define THREAD_FPU ", struct task_struct, thread.fpu); + offset("THREAD_FPU", struct task_struct, thread.fpu); - offset("#define THREAD_BVADDR ", struct task_struct, \ + offset("THREAD_BVADDR", struct task_struct, \ thread.cp0_badvaddr); - offset("#define THREAD_BUADDR ", struct task_struct, \ + offset("THREAD_BUADDR", struct task_struct, \ thread.cp0_baduaddr); - offset("#define THREAD_ECODE ", struct task_struct, \ + offset("THREAD_ECODE", struct task_struct, \ thread.error_code); - offset("#define THREAD_TRAPNO ", struct task_struct, thread.trap_no); - offset("#define THREAD_TRAMP ", struct task_struct, \ + offset("THREAD_TRAPNO", struct task_struct, thread.trap_no); + offset("THREAD_TRAMP", struct task_struct, \ thread.irix_trampoline); - offset("#define THREAD_OLDCTX ", struct task_struct, \ + offset("THREAD_OLDCTX", struct task_struct, \ thread.irix_oldctx); linefeed; } void output_thread_fpu_defines(void) { - offset("#define THREAD_FPR0 ", + offset("THREAD_FPR0", struct task_struct, thread.fpu.fpr[0]); - offset("#define THREAD_FPR1 ", + offset("THREAD_FPR1", struct task_struct, thread.fpu.fpr[1]); - offset("#define THREAD_FPR2 ", + offset("THREAD_FPR2", struct task_struct, thread.fpu.fpr[2]); - offset("#define THREAD_FPR3 ", + offset("THREAD_FPR3", struct task_struct, thread.fpu.fpr[3]); - offset("#define THREAD_FPR4 ", + offset("THREAD_FPR4", struct task_struct, thread.fpu.fpr[4]); - offset("#define THREAD_FPR5 ", + offset("THREAD_FPR5", struct task_struct, thread.fpu.fpr[5]); - offset("#define THREAD_FPR6 ", + offset("THREAD_FPR6", struct task_struct, thread.fpu.fpr[6]); - offset("#define THREAD_FPR7 ", + offset("THREAD_FPR7", struct task_struct, thread.fpu.fpr[7]); - offset("#define THREAD_FPR8 ", + offset("THREAD_FPR8", struct task_struct, thread.fpu.fpr[8]); - offset("#define THREAD_FPR9 ", + offset("THREAD_FPR9", struct task_struct, thread.fpu.fpr[9]); - offset("#define THREAD_FPR10 ", + offset("THREAD_FPR10", struct task_struct, thread.fpu.fpr[10]); - offset("#define THREAD_FPR11 ", + offset("THREAD_FPR11", struct task_struct, thread.fpu.fpr[11]); - offset("#define THREAD_FPR12 ", + offset("THREAD_FPR12", struct task_struct, thread.fpu.fpr[12]); - offset("#define THREAD_FPR13 ", + offset("THREAD_FPR13", struct task_struct, thread.fpu.fpr[13]); - offset("#define THREAD_FPR14 ", + offset("THREAD_FPR14", struct task_struct, thread.fpu.fpr[14]); - offset("#define THREAD_FPR15 ", + offset("THREAD_FPR15", struct task_struct, thread.fpu.fpr[15]); - offset("#define THREAD_FPR16 ", + offset("THREAD_FPR16", struct task_struct, thread.fpu.fpr[16]); - offset("#define THREAD_FPR17 ", + offset("THREAD_FPR17", struct task_struct, thread.fpu.fpr[17]); - offset("#define THREAD_FPR18 ", + offset("THREAD_FPR18", struct task_struct, thread.fpu.fpr[18]); - offset("#define THREAD_FPR19 ", + offset("THREAD_FPR19", struct task_struct, thread.fpu.fpr[19]); - offset("#define THREAD_FPR20 ", + offset("THREAD_FPR20", struct task_struct, thread.fpu.fpr[20]); - offset("#define THREAD_FPR21 ", + offset("THREAD_FPR21", struct task_struct, thread.fpu.fpr[21]); - offset("#define THREAD_FPR22 ", + offset("THREAD_FPR22", struct task_struct, thread.fpu.fpr[22]); - offset("#define THREAD_FPR23 ", + offset("THREAD_FPR23", struct task_struct, thread.fpu.fpr[23]); - offset("#define THREAD_FPR24 ", + offset("THREAD_FPR24", struct task_struct, thread.fpu.fpr[24]); - offset("#define THREAD_FPR25 ", + offset("THREAD_FPR25", struct task_struct, thread.fpu.fpr[25]); - offset("#define THREAD_FPR26 ", + offset("THREAD_FPR26", struct task_struct, thread.fpu.fpr[26]); - offset("#define THREAD_FPR27 ", + offset("THREAD_FPR27", struct task_struct, thread.fpu.fpr[27]); - offset("#define THREAD_FPR28 ", + offset("THREAD_FPR28", struct task_struct, thread.fpu.fpr[28]); - offset("#define THREAD_FPR29 ", + offset("THREAD_FPR29", struct task_struct, thread.fpu.fpr[29]); - offset("#define THREAD_FPR30 ", + offset("THREAD_FPR30", struct task_struct, thread.fpu.fpr[30]); - offset("#define THREAD_FPR31 ", + offset("THREAD_FPR31", struct task_struct, thread.fpu.fpr[31]); - offset("#define THREAD_FCR31 ", + offset("THREAD_FCR31", struct task_struct, thread.fpu.fcr31); linefeed; } void output_mm_defines(void) { - text("/* Size of struct page */"); - size("#define STRUCT_PAGE_SIZE ", struct page); + text("Size of struct page"); + size("STRUCT_PAGE_SIZE", struct page); linefeed; - text("/* Linux mm_struct offsets. */"); - offset("#define MM_USERS ", struct mm_struct, mm_users); - offset("#define MM_PGD ", struct mm_struct, pgd); - offset("#define MM_CONTEXT ", struct mm_struct, context); + text("Linux mm_struct offsets."); + offset("MM_USERS", struct mm_struct, mm_users); + offset("MM_PGD", struct mm_struct, pgd); + offset("MM_CONTEXT", struct mm_struct, context); linefeed; - constant("#define _PAGE_SIZE ", PAGE_SIZE); - constant("#define _PAGE_SHIFT ", PAGE_SHIFT); + constant("_PAGE_SIZE", PAGE_SIZE); + constant("_PAGE_SHIFT", PAGE_SHIFT); linefeed; - constant("#define _PGD_T_SIZE ", sizeof(pgd_t)); - constant("#define _PMD_T_SIZE ", sizeof(pmd_t)); - constant("#define _PTE_T_SIZE ", sizeof(pte_t)); + constant("_PGD_T_SIZE", sizeof(pgd_t)); + constant("_PMD_T_SIZE", sizeof(pmd_t)); + constant("_PTE_T_SIZE", sizeof(pte_t)); linefeed; - constant("#define _PGD_T_LOG2 ", PGD_T_LOG2); - constant("#define _PMD_T_LOG2 ", PMD_T_LOG2); - constant("#define _PTE_T_LOG2 ", PTE_T_LOG2); + constant("_PGD_T_LOG2", PGD_T_LOG2); + constant("_PMD_T_LOG2", PMD_T_LOG2); + constant("_PTE_T_LOG2", PTE_T_LOG2); linefeed; - constant("#define _PGD_ORDER ", PGD_ORDER); - constant("#define _PMD_ORDER ", PMD_ORDER); - constant("#define _PTE_ORDER ", PTE_ORDER); + constant("_PGD_ORDER", PGD_ORDER); + constant("_PMD_ORDER", PMD_ORDER); + constant("_PTE_ORDER", PTE_ORDER); linefeed; - constant("#define _PMD_SHIFT ", PMD_SHIFT); - constant("#define _PGDIR_SHIFT ", PGDIR_SHIFT); + constant("_PMD_SHIFT", PMD_SHIFT); + constant("_PGDIR_SHIFT", PGDIR_SHIFT); linefeed; - constant("#define _PTRS_PER_PGD ", PTRS_PER_PGD); - constant("#define _PTRS_PER_PMD ", PTRS_PER_PMD); - constant("#define _PTRS_PER_PTE ", PTRS_PER_PTE); + constant("_PTRS_PER_PGD", PTRS_PER_PGD); + constant("_PTRS_PER_PMD", PTRS_PER_PMD); + constant("_PTRS_PER_PTE", PTRS_PER_PTE); linefeed; } #ifdef CONFIG_32BIT void output_sc_defines(void) { - text("/* Linux sigcontext offsets. */"); - offset("#define SC_REGS ", struct sigcontext, sc_regs); - offset("#define SC_FPREGS ", struct sigcontext, sc_fpregs); - offset("#define SC_ACX ", struct sigcontext, sc_acx); - offset("#define SC_MDHI ", struct sigcontext, sc_mdhi); - offset("#define SC_MDLO ", struct sigcontext, sc_mdlo); - offset("#define SC_PC ", struct sigcontext, sc_pc); - offset("#define SC_FPC_CSR ", struct sigcontext, sc_fpc_csr); - offset("#define SC_FPC_EIR ", struct sigcontext, sc_fpc_eir); - offset("#define SC_HI1 ", struct sigcontext, sc_hi1); - offset("#define SC_LO1 ", struct sigcontext, sc_lo1); - offset("#define SC_HI2 ", struct sigcontext, sc_hi2); - offset("#define SC_LO2 ", struct sigcontext, sc_lo2); - offset("#define SC_HI3 ", struct sigcontext, sc_hi3); - offset("#define SC_LO3 ", struct sigcontext, sc_lo3); + text("Linux sigcontext offsets."); + offset("SC_REGS", struct sigcontext, sc_regs); + offset("SC_FPREGS", struct sigcontext, sc_fpregs); + offset("SC_ACX", struct sigcontext, sc_acx); + offset("SC_MDHI", struct sigcontext, sc_mdhi); + offset("SC_MDLO", struct sigcontext, sc_mdlo); + offset("SC_PC", struct sigcontext, sc_pc); + offset("SC_FPC_CSR", struct sigcontext, sc_fpc_csr); + offset("SC_FPC_EIR", struct sigcontext, sc_fpc_eir); + offset("SC_HI1", struct sigcontext, sc_hi1); + offset("SC_LO1", struct sigcontext, sc_lo1); + offset("SC_HI2", struct sigcontext, sc_hi2); + offset("SC_LO2", struct sigcontext, sc_lo2); + offset("SC_HI3", struct sigcontext, sc_hi3); + offset("SC_LO3", struct sigcontext, sc_lo3); linefeed; } #endif @@ -270,13 +270,13 @@ void output_sc_defines(void) #ifdef CONFIG_64BIT void output_sc_defines(void) { - text("/* Linux sigcontext offsets. */"); - offset("#define SC_REGS ", struct sigcontext, sc_regs); - offset("#define SC_FPREGS ", struct sigcontext, sc_fpregs); - offset("#define SC_MDHI ", struct sigcontext, sc_mdhi); - offset("#define SC_MDLO ", struct sigcontext, sc_mdlo); - offset("#define SC_PC ", struct sigcontext, sc_pc); - offset("#define SC_FPC_CSR ", struct sigcontext, sc_fpc_csr); + text("Linux sigcontext offsets."); + offset("SC_REGS", struct sigcontext, sc_regs); + offset("SC_FPREGS", struct sigcontext, sc_fpregs); + offset("SC_MDHI", struct sigcontext, sc_mdhi); + offset("SC_MDLO", struct sigcontext, sc_mdlo); + offset("SC_PC", struct sigcontext, sc_pc); + offset("SC_FPC_CSR", struct sigcontext, sc_fpc_csr); linefeed; } #endif @@ -284,56 +284,56 @@ void output_sc_defines(void) #ifdef CONFIG_MIPS32_COMPAT void output_sc32_defines(void) { - text("/* Linux 32-bit sigcontext offsets. */"); - offset("#define SC32_FPREGS ", struct sigcontext32, sc_fpregs); - offset("#define SC32_FPC_CSR ", struct sigcontext32, sc_fpc_csr); - offset("#define SC32_FPC_EIR ", struct sigcontext32, sc_fpc_eir); + text("Linux 32-bit sigcontext offsets."); + offset("SC32_FPREGS", struct sigcontext32, sc_fpregs); + offset("SC32_FPC_CSR", struct sigcontext32, sc_fpc_csr); + offset("SC32_FPC_EIR", struct sigcontext32, sc_fpc_eir); linefeed; } #endif void output_signal_defined(void) { - text("/* Linux signal numbers. */"); - constant("#define _SIGHUP ", SIGHUP); - constant("#define _SIGINT ", SIGINT); - constant("#define _SIGQUIT ", SIGQUIT); - constant("#define _SIGILL ", SIGILL); - constant("#define _SIGTRAP ", SIGTRAP); - constant("#define _SIGIOT ", SIGIOT); - constant("#define _SIGABRT ", SIGABRT); - constant("#define _SIGEMT ", SIGEMT); - constant("#define _SIGFPE ", SIGFPE); - constant("#define _SIGKILL ", SIGKILL); - constant("#define _SIGBUS ", SIGBUS); - constant("#define _SIGSEGV ", SIGSEGV); - constant("#define _SIGSYS ", SIGSYS); - constant("#define _SIGPIPE ", SIGPIPE); - constant("#define _SIGALRM ", SIGALRM); - constant("#define _SIGTERM ", SIGTERM); - constant("#define _SIGUSR1 ", SIGUSR1); - constant("#define _SIGUSR2 ", SIGUSR2); - constant("#define _SIGCHLD ", SIGCHLD); - constant("#define _SIGPWR ", SIGPWR); - constant("#define _SIGWINCH ", SIGWINCH); - constant("#define _SIGURG ", SIGURG); - constant("#define _SIGIO ", SIGIO); - constant("#define _SIGSTOP ", SIGSTOP); - constant("#define _SIGTSTP ", SIGTSTP); - constant("#define _SIGCONT ", SIGCONT); - constant("#define _SIGTTIN ", SIGTTIN); - constant("#define _SIGTTOU ", SIGTTOU); - constant("#define _SIGVTALRM ", SIGVTALRM); - constant("#define _SIGPROF ", SIGPROF); - constant("#define _SIGXCPU ", SIGXCPU); - constant("#define _SIGXFSZ ", SIGXFSZ); + text("Linux signal numbers."); + constant("_SIGHUP", SIGHUP); + constant("_SIGINT", SIGINT); + constant("_SIGQUIT", SIGQUIT); + constant("_SIGILL", SIGILL); + constant("_SIGTRAP", SIGTRAP); + constant("_SIGIOT", SIGIOT); + constant("_SIGABRT", SIGABRT); + constant("_SIGEMT", SIGEMT); + constant("_SIGFPE", SIGFPE); + constant("_SIGKILL", SIGKILL); + constant("_SIGBUS", SIGBUS); + constant("_SIGSEGV", SIGSEGV); + constant("_SIGSYS", SIGSYS); + constant("_SIGPIPE", SIGPIPE); + constant("_SIGALRM", SIGALRM); + constant("_SIGTERM", SIGTERM); + constant("_SIGUSR1", SIGUSR1); + constant("_SIGUSR2", SIGUSR2); + constant("_SIGCHLD", SIGCHLD); + constant("_SIGPWR", SIGPWR); + constant("_SIGWINCH", SIGWINCH); + constant("_SIGURG", SIGURG); + constant("_SIGIO", SIGIO); + constant("_SIGSTOP", SIGSTOP); + constant("_SIGTSTP", SIGTSTP); + constant("_SIGCONT", SIGCONT); + constant("_SIGTTIN", SIGTTIN); + constant("_SIGTTOU", SIGTTOU); + constant("_SIGVTALRM", SIGVTALRM); + constant("_SIGPROF", SIGPROF); + constant("_SIGXCPU", SIGXCPU); + constant("_SIGXFSZ", SIGXFSZ); linefeed; } void output_irq_cpustat_t_defines(void) { - text("/* Linux irq_cpustat_t offsets. */"); - offset("#define IC_SOFTIRQ_PENDING ", irq_cpustat_t, __softirq_pending); - size("#define IC_IRQ_CPUSTAT_T ", irq_cpustat_t); + text("Linux irq_cpustat_t offsets."); + offset("IC_SOFTIRQ_PENDING", irq_cpustat_t, __softirq_pending); + size("IC_IRQ_CPUSTAT_T", irq_cpustat_t); linefeed; } diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c new file mode 100644 index 00000000000..df4acb68bfb --- /dev/null +++ b/arch/mips/kernel/cevt-ds1287.c @@ -0,0 +1,129 @@ +/* + * DS1287 clockevent driver + * + * Copyright (C) 2008 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <linux/clockchips.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/mc146818rtc.h> + +#include <asm/time.h> + +int ds1287_timer_state(void) +{ + return (CMOS_READ(RTC_REG_C) & RTC_PF) != 0; +} + +int ds1287_set_base_clock(unsigned int hz) +{ + u8 rate; + + switch (hz) { + case 128: + rate = 0x9; + break; + case 256: + rate = 0x8; + break; + case 1024: + rate = 0x6; + break; + default: + return -EINVAL; + } + + CMOS_WRITE(RTC_REF_CLCK_32KHZ | rate, RTC_REG_A); + + return 0; +} + +static int ds1287_set_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + return -EINVAL; +} + +static void ds1287_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + u8 val; + + spin_lock(&rtc_lock); + + val = CMOS_READ(RTC_REG_B); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + val |= RTC_PIE; + break; + default: + val &= ~RTC_PIE; + break; + } + + CMOS_WRITE(val, RTC_REG_B); + + spin_unlock(&rtc_lock); +} + +static void ds1287_event_handler(struct clock_event_device *dev) +{ +} + +static struct clock_event_device ds1287_clockevent = { + .name = "ds1287", + .features = CLOCK_EVT_FEAT_PERIODIC, + .cpumask = CPU_MASK_CPU0, + .set_next_event = ds1287_set_next_event, + .set_mode = ds1287_set_mode, + .event_handler = ds1287_event_handler, +}; + +static irqreturn_t ds1287_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *cd = &ds1287_clockevent; + + /* Ack the RTC interrupt. */ + CMOS_READ(RTC_REG_C); + + cd->event_handler(cd); + + return IRQ_HANDLED; +} + +static struct irqaction ds1287_irqaction = { + .handler = ds1287_interrupt, + .flags = IRQF_DISABLED | IRQF_PERCPU, + .name = "ds1287", +}; + +int __init ds1287_clockevent_init(int irq) +{ + struct clock_event_device *cd; + + cd = &ds1287_clockevent; + cd->rating = 100; + cd->irq = irq; + clockevent_set_clock(cd, 32768); + cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); + cd->min_delta_ns = clockevent_delta2ns(0x300, cd); + + clockevents_register_device(&ds1287_clockevent); + + return setup_irq(irq, &ds1287_irqaction); +} diff --git a/arch/mips/kernel/cevt-gt641xx.c b/arch/mips/kernel/cevt-gt641xx.c index c36772631fe..6e2f58520af 100644 --- a/arch/mips/kernel/cevt-gt641xx.c +++ b/arch/mips/kernel/cevt-gt641xx.c @@ -25,8 +25,6 @@ #include <asm/gt64120.h> #include <asm/time.h> -#include <irq.h> - static DEFINE_SPINLOCK(gt641xx_timer_lock); static unsigned int gt641xx_base_clock; diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 89c3304cb93..335a6ae3d59 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -169,6 +169,7 @@ static inline void check_wait(void) case CPU_24K: case CPU_34K: + case CPU_1004K: cpu_wait = r4k_wait; if (read_c0_config7() & MIPS_CONF7_WII) cpu_wait = r4k_wait_irqoff; @@ -675,6 +676,12 @@ static void __cpuinit decode_configs(struct cpuinfo_mips *c) return; } +#ifdef CONFIG_CPU_MIPSR2 +extern void spram_config(void); +#else +static inline void spram_config(void) {} +#endif + static inline void cpu_probe_mips(struct cpuinfo_mips *c) { decode_configs(c); @@ -711,7 +718,12 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c) case PRID_IMP_74K: c->cputype = CPU_74K; break; + case PRID_IMP_1004K: + c->cputype = CPU_1004K; + break; } + + spram_config(); } static inline void cpu_probe_alchemy(struct cpuinfo_mips *c) @@ -778,7 +790,7 @@ static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c) } } -static inline void cpu_probe_philips(struct cpuinfo_mips *c) +static inline void cpu_probe_nxp(struct cpuinfo_mips *c) { decode_configs(c); switch (c->processor_id & 0xff00) { @@ -787,7 +799,7 @@ static inline void cpu_probe_philips(struct cpuinfo_mips *c) c->isa_level = MIPS_CPU_ISA_M32R1; break; default: - panic("Unknown Philips Core!"); /* REVISIT: die? */ + panic("Unknown NXP Core!"); /* REVISIT: die? */ break; } } @@ -876,6 +888,7 @@ static __cpuinit const char *cpu_to_name(struct cpuinfo_mips *c) case CPU_24K: name = "MIPS 24K"; break; case CPU_25KF: name = "MIPS 25Kf"; break; case CPU_34K: name = "MIPS 34K"; break; + case CPU_1004K: name = "MIPS 1004K"; break; case CPU_74K: name = "MIPS 74K"; break; case CPU_VR4111: name = "NEC VR4111"; break; case CPU_VR4121: name = "NEC VR4121"; break; @@ -925,8 +938,8 @@ __cpuinit void cpu_probe(void) case PRID_COMP_SANDCRAFT: cpu_probe_sandcraft(c); break; - case PRID_COMP_PHILIPS: - cpu_probe_philips(c); + case PRID_COMP_NXP: + cpu_probe_nxp(c); break; default: c->cputype = CPU_UNKNOWN; diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c new file mode 100644 index 00000000000..1d5f63cf899 --- /dev/null +++ b/arch/mips/kernel/csrc-ioasic.c @@ -0,0 +1,65 @@ +/* + * DEC I/O ASIC's counter clocksource + * + * Copyright (C) 2008 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <linux/clocksource.h> +#include <linux/init.h> + +#include <asm/ds1287.h> +#include <asm/time.h> +#include <asm/dec/ioasic.h> +#include <asm/dec/ioasic_addrs.h> + +static cycle_t dec_ioasic_hpt_read(void) +{ + return ioasic_read(IO_REG_FCTR); +} + +static struct clocksource clocksource_dec = { + .name = "dec-ioasic", + .read = dec_ioasic_hpt_read, + .mask = CLOCKSOURCE_MASK(32), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +void __init dec_ioasic_clocksource_init(void) +{ + unsigned int freq; + u32 start, end; + int i = HZ / 10; + + + while (!ds1287_timer_state()) + ; + + start = dec_ioasic_hpt_read(); + + while (i--) + while (!ds1287_timer_state()) + ; + + end = dec_ioasic_hpt_read(); + + freq = (end - start) * 10; + printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq); + + clocksource_dec.rating = 200 + freq / 10000000; + clocksource_set_clock(&clocksource_dec, freq); + + clocksource_register(&clocksource_dec); +} diff --git a/arch/mips/kernel/gpio_txx9.c b/arch/mips/kernel/gpio_txx9.c new file mode 100644 index 00000000000..b1436a85799 --- /dev/null +++ b/arch/mips/kernel/gpio_txx9.c @@ -0,0 +1,87 @@ +/* + * A gpio chip driver for TXx9 SoCs + * + * Copyright (C) 2008 Atsushi Nemoto <anemo@mba.ocn.ne.jp> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/gpio.h> +#include <linux/errno.h> +#include <linux/io.h> +#include <asm/txx9pio.h> + +static DEFINE_SPINLOCK(txx9_gpio_lock); + +static struct txx9_pio_reg __iomem *txx9_pioptr; + +static int txx9_gpio_get(struct gpio_chip *chip, unsigned int offset) +{ + return __raw_readl(&txx9_pioptr->din) & (1 << offset); +} + +static void txx9_gpio_set_raw(unsigned int offset, int value) +{ + u32 val; + val = __raw_readl(&txx9_pioptr->dout); + if (value) + val |= 1 << offset; + else + val &= ~(1 << offset); + __raw_writel(val, &txx9_pioptr->dout); +} + +static void txx9_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) +{ + unsigned long flags; + spin_lock_irqsave(&txx9_gpio_lock, flags); + txx9_gpio_set_raw(offset, value); + mmiowb(); + spin_unlock_irqrestore(&txx9_gpio_lock, flags); +} + +static int txx9_gpio_dir_in(struct gpio_chip *chip, unsigned int offset) +{ + spin_lock_irq(&txx9_gpio_lock); + __raw_writel(__raw_readl(&txx9_pioptr->dir) & ~(1 << offset), + &txx9_pioptr->dir); + mmiowb(); + spin_unlock_irq(&txx9_gpio_lock); + return 0; +} + +static int txx9_gpio_dir_out(struct gpio_chip *chip, unsigned int offset, + int value) +{ + spin_lock_irq(&txx9_gpio_lock); + txx9_gpio_set_raw(offset, value); + __raw_writel(__raw_readl(&txx9_pioptr->dir) | (1 << offset), + &txx9_pioptr->dir); + mmiowb(); + spin_unlock_irq(&txx9_gpio_lock); + return 0; +} + +static struct gpio_chip txx9_gpio_chip = { + .get = txx9_gpio_get, + .set = txx9_gpio_set, + .direction_input = txx9_gpio_dir_in, + .direction_output = txx9_gpio_dir_out, + .label = "TXx9", +}; + +int __init txx9_gpio_init(unsigned long baseaddr, + unsigned int base, unsigned int num) +{ + txx9_pioptr = ioremap(baseaddr, sizeof(struct txx9_pio_reg)); + if (!txx9_pioptr) + return -ENODEV; + txx9_gpio_chip.base = base; + txx9_gpio_chip.ngpio = num; + return gpiochip_add(&txx9_gpio_chip); +} diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c new file mode 100644 index 00000000000..f0a4bb19e09 --- /dev/null +++ b/arch/mips/kernel/irq-gic.c @@ -0,0 +1,295 @@ +#undef DEBUG + +#include <linux/bitmap.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <asm/gic.h> +#include <asm/gcmpregs.h> +#include <asm/mips-boards/maltaint.h> +#include <asm/irq.h> +#include <linux/hardirq.h> +#include <asm-generic/bitops/find.h> + + +static unsigned long _gic_base; +static unsigned int _irqbase, _mapsize, numvpes, numintrs; +static struct gic_intr_map *_intrmap; + +static struct gic_pcpu_mask pcpu_masks[NR_CPUS]; +static struct gic_pending_regs pending_regs[NR_CPUS]; +static struct gic_intrmask_regs intrmask_regs[NR_CPUS]; + +#define gic_wedgeb2bok 0 /* + * Can GIC handle b2b writes to wedge register? + */ +#if gic_wedgeb2bok == 0 +static DEFINE_SPINLOCK(gic_wedgeb2b_lock); +#endif + +void gic_send_ipi(unsigned int intr) +{ +#if gic_wedgeb2bok == 0 + unsigned long flags; +#endif + pr_debug("CPU%d: %s status %08x\n", smp_processor_id(), __func__, + read_c0_status()); + if (!gic_wedgeb2bok) + spin_lock_irqsave(&gic_wedgeb2b_lock, flags); + GICWRITE(GIC_REG(SHARED, GIC_SH_WEDGE), 0x80000000 | intr); + if (!gic_wedgeb2bok) { + (void) GIC_REG(SHARED, GIC_SH_CONFIG); + spin_unlock_irqrestore(&gic_wedgeb2b_lock, flags); + } +} + +/* This is Malta specific and needs to be exported */ +static void vpe_local_setup(unsigned int numvpes) +{ + int i; + unsigned long timer_interrupt = 5, perf_interrupt = 5; + unsigned int vpe_ctl; + + /* + * Setup the default performance counter timer interrupts + * for all VPEs + */ + for (i = 0; i < numvpes; i++) { + GICWRITE(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR), i); + + /* Are Interrupts locally routable? */ + GICREAD(GIC_REG(VPE_OTHER, GIC_VPE_CTL), vpe_ctl); + if (vpe_ctl & GIC_VPE_CTL_TIMER_RTBL_MSK) + GICWRITE(GIC_REG(VPE_OTHER, GIC_VPE_TIMER_MAP), + GIC_MAP_TO_PIN_MSK | timer_interrupt); + + if (vpe_ctl & GIC_VPE_CTL_PERFCNT_RTBL_MSK) + GICWRITE(GIC_REG(VPE_OTHER, GIC_VPE_PERFCTR_MAP), + GIC_MAP_TO_PIN_MSK | perf_interrupt); + } +} + +unsigned int gic_get_int(void) +{ + unsigned int i; + unsigned long *pending, *intrmask, *pcpu_mask; + unsigned long *pending_abs, *intrmask_abs; + + /* Get per-cpu bitmaps */ + pending = pending_regs[smp_processor_id()].pending; + intrmask = intrmask_regs[smp_processor_id()].intrmask; + pcpu_mask = pcpu_masks[smp_processor_id()].pcpu_mask; + + pending_abs = (unsigned long *) GIC_REG_ABS_ADDR(SHARED, + GIC_SH_PEND_31_0_OFS); + intrmask_abs = (unsigned long *) GIC_REG_ABS_ADDR(SHARED, + GIC_SH_MASK_31_0_OFS); + + for (i = 0; i < BITS_TO_LONGS(GIC_NUM_INTRS); i++) { + GICREAD(*pending_abs, pending[i]); + GICREAD(*intrmask_abs, intrmask[i]); + pending_abs++; + intrmask_abs++; + } + + bitmap_and(pending, pending, intrmask, GIC_NUM_INTRS); + bitmap_and(pending, pending, pcpu_mask, GIC_NUM_INTRS); + + i = find_first_bit(pending, GIC_NUM_INTRS); + + pr_debug("CPU%d: %s pend=%d\n", smp_processor_id(), __func__, i); + + return i; +} + +static unsigned int gic_irq_startup(unsigned int irq) +{ + pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq); + irq -= _irqbase; + /* FIXME: this is wrong for !GICISWORDLITTLEENDIAN */ + GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_SMASK_31_0_OFS + (irq / 32))), + 1 << (irq % 32)); + return 0; +} + +static void gic_irq_ack(unsigned int irq) +{ +#if gic_wedgeb2bok == 0 + unsigned long flags; +#endif + pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq); + irq -= _irqbase; + GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_RMASK_31_0_OFS + (irq / 32))), + 1 << (irq % 32)); + + if (_intrmap[irq].trigtype == GIC_TRIG_EDGE) { + if (!gic_wedgeb2bok) + spin_lock_irqsave(&gic_wedgeb2b_lock, flags); + GICWRITE(GIC_REG(SHARED, GIC_SH_WEDGE), irq); + if (!gic_wedgeb2bok) { + (void) GIC_REG(SHARED, GIC_SH_CONFIG); + spin_unlock_irqrestore(&gic_wedgeb2b_lock, flags); + } + } +} + +static void gic_mask_irq(unsigned int irq) +{ + pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq); + irq -= _irqbase; + /* FIXME: this is wrong for !GICISWORDLITTLEENDIAN */ + GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_RMASK_31_0_OFS + (irq / 32))), + 1 << (irq % 32)); +} + +static void gic_unmask_irq(unsigned int irq) +{ + pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq); + irq -= _irqbase; + /* FIXME: this is wrong for !GICISWORDLITTLEENDIAN */ + GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_SMASK_31_0_OFS + (irq / 32))), + 1 << (irq % 32)); +} + +#ifdef CONFIG_SMP + +static DEFINE_SPINLOCK(gic_lock); + +static void gic_set_affinity(unsigned int irq, cpumask_t cpumask) +{ + cpumask_t tmp = CPU_MASK_NONE; + unsigned long flags; + int i; + + pr_debug(KERN_DEBUG "%s called\n", __func__); + irq -= _irqbase; + + cpus_and(tmp, cpumask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + /* Assumption : cpumask refers to a single CPU */ + spin_lock_irqsave(&gic_lock, flags); + for (;;) { + /* Re-route this IRQ */ + GIC_SH_MAP_TO_VPE_SMASK(irq, first_cpu(tmp)); + + /* + * FIXME: assumption that _intrmap is ordered and has no holes + */ + + /* Update the intr_map */ + _intrmap[irq].cpunum = first_cpu(tmp); + + /* Update the pcpu_masks */ + for (i = 0; i < NR_CPUS; i++) + clear_bit(irq, pcpu_masks[i].pcpu_mask); + set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask); + + } + irq_desc[irq].affinity = cpumask; + spin_unlock_irqrestore(&gic_lock, flags); + +} +#endif + +static struct irq_chip gic_irq_controller = { + .name = "MIPS GIC", + .startup = gic_irq_startup, + .ack = gic_irq_ack, + .mask = gic_mask_irq, + .mask_ack = gic_mask_irq, + .unmask = gic_unmask_irq, + .eoi = gic_unmask_irq, +#ifdef CONFIG_SMP + .set_affinity = gic_set_affinity, +#endif +}; + +static void __init setup_intr(unsigned int intr, unsigned int cpu, + unsigned int pin, unsigned int polarity, unsigned int trigtype) +{ + /* Setup Intr to Pin mapping */ + if (pin & GIC_MAP_TO_NMI_MSK) { + GICWRITE(GIC_REG_ADDR(SHARED, GIC_SH_MAP_TO_PIN(intr)), pin); + /* FIXME: hack to route NMI to all cpu's */ + for (cpu = 0; cpu < NR_CPUS; cpu += 32) { + GICWRITE(GIC_REG_ADDR(SHARED, + GIC_SH_MAP_TO_VPE_REG_OFF(intr, cpu)), + 0xffffffff); + } + } else { + GICWRITE(GIC_REG_ADDR(SHARED, GIC_SH_MAP_TO_PIN(intr)), + GIC_MAP_TO_PIN_MSK | pin); + /* Setup Intr to CPU mapping */ + GIC_SH_MAP_TO_VPE_SMASK(intr, cpu); + } + + /* Setup Intr Polarity */ + GIC_SET_POLARITY(intr, polarity); + + /* Setup Intr Trigger Type */ + GIC_SET_TRIGGER(intr, trigtype); + + /* Init Intr Masks */ + GIC_SET_INTR_MASK(intr, 0); +} + +static void __init gic_basic_init(void) +{ + unsigned int i, cpu; + + /* Setup defaults */ + for (i = 0; i < GIC_NUM_INTRS; i++) { + GIC_SET_POLARITY(i, GIC_POL_POS); + GIC_SET_TRIGGER(i, GIC_TRIG_LEVEL); + GIC_SET_INTR_MASK(i, 0); + } + + /* Setup specifics */ + for (i = 0; i < _mapsize; i++) { + cpu = _intrmap[i].cpunum; + if (cpu == X) + continue; + + setup_intr(_intrmap[i].intrnum, + _intrmap[i].cpunum, + _intrmap[i].pin, + _intrmap[i].polarity, + _intrmap[i].trigtype); + /* Initialise per-cpu Interrupt software masks */ + if (_intrmap[i].ipiflag) + set_bit(_intrmap[i].intrnum, pcpu_masks[cpu].pcpu_mask); + } + + vpe_local_setup(numvpes); + + for (i = _irqbase; i < (_irqbase + numintrs); i++) + set_irq_chip(i, &gic_irq_controller); +} + +void __init gic_init(unsigned long gic_base_addr, + unsigned long gic_addrspace_size, + struct gic_intr_map *intr_map, unsigned int intr_map_size, + unsigned int irqbase) +{ + unsigned int gicconfig; + + _gic_base = (unsigned long) ioremap_nocache(gic_base_addr, + gic_addrspace_size); + _irqbase = irqbase; + _intrmap = intr_map; + _mapsize = intr_map_size; + + GICREAD(GIC_REG(SHARED, GIC_SH_CONFIG), gicconfig); + numintrs = (gicconfig & GIC_SH_CONFIG_NUMINTRS_MSK) >> + GIC_SH_CONFIG_NUMINTRS_SHF; + numintrs = ((numintrs + 1) * 8); + + numvpes = (gicconfig & GIC_SH_CONFIG_NUMVPES_MSK) >> + GIC_SH_CONFIG_NUMVPES_SHF; + + pr_debug("%s called\n", __func__); + + gic_basic_init(); +} diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c index 4edc7e451d9..963c16d266a 100644 --- a/arch/mips/kernel/irq-msc01.c +++ b/arch/mips/kernel/irq-msc01.c @@ -17,6 +17,7 @@ #include <asm/io.h> #include <asm/irq.h> #include <asm/msc01_ic.h> +#include <asm/traps.h> static unsigned long _icctrl_msc; #define MSC01_IC_REG_BASE _icctrl_msc @@ -98,14 +99,13 @@ void ll_msc_irq(void) } } -void -msc_bind_eic_interrupt(unsigned int irq, unsigned int set) +static void msc_bind_eic_interrupt(int irq, int set) { MSCIC_WRITE(MSC01_IC_RAMW, (irq<<MSC01_IC_RAMW_ADDR_SHF) | (set<<MSC01_IC_RAMW_DATA_SHF)); } -struct irq_chip msc_levelirq_type = { +static struct irq_chip msc_levelirq_type = { .name = "SOC-it-Level", .ack = level_mask_and_ack_msc_irq, .mask = mask_msc_irq, @@ -115,7 +115,7 @@ struct irq_chip msc_levelirq_type = { .end = end_msc_irq, }; -struct irq_chip msc_edgeirq_type = { +static struct irq_chip msc_edgeirq_type = { .name = "SOC-it-Edge", .ack = edge_mask_and_ack_msc_irq, .mask = mask_msc_irq, @@ -128,8 +128,6 @@ struct irq_chip msc_edgeirq_type = { void __init init_msc_irqs(unsigned long icubase, unsigned int irqbase, msc_irqmap_t *imp, int nirq) { - extern void (*board_bind_eic_interrupt)(unsigned int irq, unsigned int regset); - _icctrl_msc = (unsigned long) ioremap(icubase, 0x40000); /* Reset interrupt controller - initialises all registers to 0 */ diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h index c0faabd5201..6c8e8c4246f 100644 --- a/arch/mips/kernel/signal-common.h +++ b/arch/mips/kernel/signal-common.h @@ -14,7 +14,7 @@ /* #define DEBUG_SIG */ #ifdef DEBUG_SIG -# define DEBUGP(fmt, args...) printk("%s: " fmt, __FUNCTION__ , ##args) +# define DEBUGP(fmt, args...) printk("%s: " fmt, __func__, ##args) #else # define DEBUGP(fmt, args...) #endif diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c new file mode 100644 index 00000000000..ca476c4f62a --- /dev/null +++ b/arch/mips/kernel/smp-cmp.c @@ -0,0 +1,265 @@ +/* + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Copyright (C) 2007 MIPS Technologies, Inc. + * Chris Dearman (chris@mips.com) + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/cpumask.h> +#include <linux/interrupt.h> +#include <linux/compiler.h> + +#include <asm/atomic.h> +#include <asm/cacheflush.h> +#include <asm/cpu.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/hardirq.h> +#include <asm/mmu_context.h> +#include <asm/smp.h> +#include <asm/time.h> +#include <asm/mipsregs.h> +#include <asm/mipsmtregs.h> +#include <asm/mips_mt.h> + +/* + * Crude manipulation of the CPU masks to control which + * which CPU's are brought online during initialisation + * + * Beware... this needs to be called after CPU discovery + * but before CPU bringup + */ +static int __init allowcpus(char *str) +{ + cpumask_t cpu_allow_map; + char buf[256]; + int len; + + cpus_clear(cpu_allow_map); + if (cpulist_parse(str, cpu_allow_map) == 0) { + cpu_set(0, cpu_allow_map); + cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map); + len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map); + buf[len] = '\0'; + pr_debug("Allowable CPUs: %s\n", buf); + return 1; + } else + return 0; +} +__setup("allowcpus=", allowcpus); + +static void ipi_call_function(unsigned int cpu) +{ + unsigned int action = 0; + + pr_debug("CPU%d: %s cpu %d status %08x\n", + smp_processor_id(), __func__, cpu, read_c0_status()); + + switch (cpu) { + case 0: + action = GIC_IPI_EXT_INTR_CALLFNC_VPE0; + break; + case 1: + action = GIC_IPI_EXT_INTR_CALLFNC_VPE1; + break; + case 2: + action = GIC_IPI_EXT_INTR_CALLFNC_VPE2; + break; + case 3: + action = GIC_IPI_EXT_INTR_CALLFNC_VPE3; + break; + } + gic_send_ipi(action); +} + + +static void ipi_resched(unsigned int cpu) +{ + unsigned int action = 0; + + pr_debug("CPU%d: %s cpu %d status %08x\n", + smp_processor_id(), __func__, cpu, read_c0_status()); + + switch (cpu) { + case 0: + action = GIC_IPI_EXT_INTR_RESCHED_VPE0; + break; + case 1: + action = GIC_IPI_EXT_INTR_RESCHED_VPE1; + break; + case 2: + action = GIC_IPI_EXT_INTR_RESCHED_VPE2; + break; + case 3: + action = GIC_IPI_EXT_INTR_RESCHED_VPE3; + break; + } + gic_send_ipi(action); +} + +/* + * FIXME: This isn't restricted to CMP + * The SMVP kernel could use GIC interrupts if available + */ +void cmp_send_ipi_single(int cpu, unsigned int action) +{ + unsigned long flags; + + local_irq_save(flags); + + switch (action) { + case SMP_CALL_FUNCTION: + ipi_call_function(cpu); + break; + + case SMP_RESCHEDULE_YOURSELF: + ipi_resched(cpu); + break; + } + + local_irq_restore(flags); +} + +static void cmp_send_ipi_mask(cpumask_t mask, unsigned int action) +{ + unsigned int i; + + for_each_cpu_mask(i, mask) + cmp_send_ipi_single(i, action); +} + +static void cmp_init_secondary(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + + /* Assume GIC is present */ + change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 | STATUSF_IP6 | + STATUSF_IP7); + + /* Enable per-cpu interrupts: platform specific */ + + c->core = (read_c0_ebase() >> 1) & 0xff; +#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC) + c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE; +#endif +#ifdef CONFIG_MIPS_MT_SMTC + c->tc_id = (read_c0_tcbind() >> TCBIND_CURTC_SHIFT) & TCBIND_CURTC; +#endif +} + +static void cmp_smp_finish(void) +{ + pr_debug("SMPCMP: CPU%d: %s\n", smp_processor_id(), __func__); + + /* CDFIXME: remove this? */ + write_c0_compare(read_c0_count() + (8 * mips_hpt_frequency / HZ)); + +#ifdef CONFIG_MIPS_MT_FPAFF + /* If we have an FPU, enroll ourselves in the FPU-full mask */ + if (cpu_has_fpu) + cpu_set(smp_processor_id(), mt_fpu_cpumask); +#endif /* CONFIG_MIPS_MT_FPAFF */ + + local_irq_enable(); +} + +static void cmp_cpus_done(void) +{ + pr_debug("SMPCMP: CPU%d: %s\n", smp_processor_id(), __func__); +} + +/* + * Setup the PC, SP, and GP of a secondary processor and start it running + * smp_bootstrap is the place to resume from + * __KSTK_TOS(idle) is apparently the stack pointer + * (unsigned long)idle->thread_info the gp + */ +static void cmp_boot_secondary(int cpu, struct task_struct *idle) +{ + struct thread_info *gp = task_thread_info(idle); + unsigned long sp = __KSTK_TOS(idle); + unsigned long pc = (unsigned long)&smp_bootstrap; + unsigned long a0 = 0; + + pr_debug("SMPCMP: CPU%d: %s cpu %d\n", smp_processor_id(), + __func__, cpu); + +#if 0 + /* Needed? */ + flush_icache_range((unsigned long)gp, + (unsigned long)(gp + sizeof(struct thread_info))); +#endif + + amon_cpu_start(cpu, pc, sp, gp, a0); +} + +/* + * Common setup before any secondaries are started + */ +void __init cmp_smp_setup(void) +{ + int i; + int ncpu = 0; + + pr_debug("SMPCMP: CPU%d: %s\n", smp_processor_id(), __func__); + +#ifdef CONFIG_MIPS_MT_FPAFF + /* If we have an FPU, enroll ourselves in the FPU-full mask */ + if (cpu_has_fpu) + cpu_set(0, mt_fpu_cpumask); +#endif /* CONFIG_MIPS_MT_FPAFF */ + + for (i = 1; i < NR_CPUS; i++) { + if (amon_cpu_avail(i)) { + cpu_set(i, phys_cpu_present_map); + __cpu_number_map[i] = ++ncpu; + __cpu_logical_map[ncpu] = i; + } + } + + if (cpu_has_mipsmt) { + unsigned int nvpe, mvpconf0 = read_c0_mvpconf0(); + + nvpe = ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1; + smp_num_siblings = nvpe; + } + pr_info("Detected %i available secondary CPU(s)\n", ncpu); +} + +void __init cmp_prepare_cpus(unsigned int max_cpus) +{ + pr_debug("SMPCMP: CPU%d: %s max_cpus=%d\n", + smp_processor_id(), __func__, max_cpus); + + /* + * FIXME: some of these options are per-system, some per-core and + * some per-cpu + */ + mips_mt_set_cpuoptions(); +} + +struct plat_smp_ops cmp_smp_ops = { + .send_ipi_single = cmp_send_ipi_single, + .send_ipi_mask = cmp_send_ipi_mask, + .init_secondary = cmp_init_secondary, + .smp_finish = cmp_smp_finish, + .cpus_done = cmp_cpus_done, + .boot_secondary = cmp_boot_secondary, + .smp_setup = cmp_smp_setup, + .prepare_cpus = cmp_prepare_cpus, +}; diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 89e6f6aa516..87a1816c1f4 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -36,110 +36,7 @@ #include <asm/mipsmtregs.h> #include <asm/mips_mt.h> -#define MIPS_CPU_IPI_RESCHED_IRQ 0 -#define MIPS_CPU_IPI_CALL_IRQ 1 - -static int cpu_ipi_resched_irq, cpu_ipi_call_irq; - -#if 0 -static void dump_mtregisters(int vpe, int tc) -{ - printk("vpe %d tc %d\n", vpe, tc); - - settc(tc); - - printk(" c0 status 0x%lx\n", read_vpe_c0_status()); - printk(" vpecontrol 0x%lx\n", read_vpe_c0_vpecontrol()); - printk(" vpeconf0 0x%lx\n", read_vpe_c0_vpeconf0()); - printk(" tcstatus 0x%lx\n", read_tc_c0_tcstatus()); - printk(" tcrestart 0x%lx\n", read_tc_c0_tcrestart()); - printk(" tcbind 0x%lx\n", read_tc_c0_tcbind()); - printk(" tchalt 0x%lx\n", read_tc_c0_tchalt()); -} -#endif - -void __init sanitize_tlb_entries(void) -{ - int i, tlbsiz; - unsigned long mvpconf0, ncpu; - - if (!cpu_has_mipsmt) - return; - - /* Enable VPC */ - set_c0_mvpcontrol(MVPCONTROL_VPC); - - back_to_back_c0_hazard(); - - /* Disable TLB sharing */ - clear_c0_mvpcontrol(MVPCONTROL_STLB); - - mvpconf0 = read_c0_mvpconf0(); - - printk(KERN_INFO "MVPConf0 0x%lx TLBS %lx PTLBE %ld\n", mvpconf0, - (mvpconf0 & MVPCONF0_TLBS) >> MVPCONF0_TLBS_SHIFT, - (mvpconf0 & MVPCONF0_PTLBE) >> MVPCONF0_PTLBE_SHIFT); - - tlbsiz = (mvpconf0 & MVPCONF0_PTLBE) >> MVPCONF0_PTLBE_SHIFT; - ncpu = ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1; - - printk(" tlbsiz %d ncpu %ld\n", tlbsiz, ncpu); - - if (tlbsiz > 0) { - /* share them out across the vpe's */ - tlbsiz /= ncpu; - - printk(KERN_INFO "setting Config1.MMU_size to %d\n", tlbsiz); - - for (i = 0; i < ncpu; i++) { - settc(i); - - if (i == 0) - write_c0_config1((read_c0_config1() & ~(0x3f << 25)) | (tlbsiz << 25)); - else - write_vpe_c0_config1((read_vpe_c0_config1() & ~(0x3f << 25)) | - (tlbsiz << 25)); - } - } - - clear_c0_mvpcontrol(MVPCONTROL_VPC); -} - -static void ipi_resched_dispatch(void) -{ - do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ); -} - -static void ipi_call_dispatch(void) -{ - do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ); -} - -static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) -{ - return IRQ_HANDLED; -} - -static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) -{ - smp_call_function_interrupt(); - - return IRQ_HANDLED; -} - -static struct irqaction irq_resched = { - .handler = ipi_resched_interrupt, - .flags = IRQF_DISABLED|IRQF_PERCPU, - .name = "IPI_resched" -}; - -static struct irqaction irq_call = { - .handler = ipi_call_interrupt, - .flags = IRQF_DISABLED|IRQF_PERCPU, - .name = "IPI_call" -}; - -static void __init smp_copy_vpe_config(void) +static void __init smvp_copy_vpe_config(void) { write_vpe_c0_status( (read_c0_status() & ~(ST0_IM | ST0_IE | ST0_KSU)) | ST0_CU0); @@ -156,7 +53,7 @@ static void __init smp_copy_vpe_config(void) write_vpe_c0_count(read_c0_count()); } -static unsigned int __init smp_vpe_init(unsigned int tc, unsigned int mvpconf0, +static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0, unsigned int ncpu) { if (tc > ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT)) @@ -182,12 +79,12 @@ static unsigned int __init smp_vpe_init(unsigned int tc, unsigned int mvpconf0, write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() & ~VPECONTROL_TE); if (tc != 0) - smp_copy_vpe_config(); + smvp_copy_vpe_config(); return ncpu; } -static void __init smp_tc_init(unsigned int tc, unsigned int mvpconf0) +static void __init smvp_tc_init(unsigned int tc, unsigned int mvpconf0) { unsigned long tmp; @@ -254,15 +151,20 @@ static void vsmp_send_ipi_mask(cpumask_t mask, unsigned int action) static void __cpuinit vsmp_init_secondary(void) { - /* Enable per-cpu interrupts */ + extern int gic_present; /* This is Malta specific: IPI,performance and timer inetrrupts */ - write_c0_status((read_c0_status() & ~ST0_IM ) | - (STATUSF_IP0 | STATUSF_IP1 | STATUSF_IP6 | STATUSF_IP7)); + if (gic_present) + change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 | + STATUSF_IP6 | STATUSF_IP7); + else + change_c0_status(ST0_IM, STATUSF_IP0 | STATUSF_IP1 | + STATUSF_IP6 | STATUSF_IP7); } static void __cpuinit vsmp_smp_finish(void) { + /* CDFIXME: remove this? */ write_c0_compare(read_c0_count() + (8* mips_hpt_frequency/HZ)); #ifdef CONFIG_MIPS_MT_FPAFF @@ -323,7 +225,7 @@ static void __cpuinit vsmp_boot_secondary(int cpu, struct task_struct *idle) /* * Common setup before any secondaries are started * Make sure all CPU's are in a sensible state before we boot any of the - * secondarys + * secondaries */ static void __init vsmp_smp_setup(void) { @@ -356,8 +258,8 @@ static void __init vsmp_smp_setup(void) for (tc = 0; tc <= ntc; tc++) { settc(tc); - smp_tc_init(tc, mvpconf0); - ncpu = smp_vpe_init(tc, mvpconf0, ncpu); + smvp_tc_init(tc, mvpconf0); + ncpu = smvp_vpe_init(tc, mvpconf0, ncpu); } /* Release config state */ @@ -371,21 +273,6 @@ static void __init vsmp_smp_setup(void) static void __init vsmp_prepare_cpus(unsigned int max_cpus) { mips_mt_set_cpuoptions(); - - /* set up ipi interrupts */ - if (cpu_has_vint) { - set_vi_handler(MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch); - set_vi_handler(MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch); - } - - cpu_ipi_resched_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ; - cpu_ipi_call_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ; - - setup_irq(cpu_ipi_resched_irq, &irq_resched); - setup_irq(cpu_ipi_call_irq, &irq_call); - - set_irq_handler(cpu_ipi_resched_irq, handle_percpu_irq); - set_irq_handler(cpu_ipi_call_irq, handle_percpu_irq); } struct plat_smp_ops vsmp_smp_ops = { diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 9d41dab90a8..33780cc61ce 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -35,6 +35,7 @@ #include <asm/atomic.h> #include <asm/cpu.h> #include <asm/processor.h> +#include <asm/r4k-timer.h> #include <asm/system.h> #include <asm/mmu_context.h> #include <asm/time.h> @@ -125,6 +126,8 @@ asmlinkage __cpuinit void start_secondary(void) cpu_set(cpu, cpu_callin_map); + synchronise_count_slave(); + cpu_idle(); } @@ -287,6 +290,7 @@ void smp_send_stop(void) void __init smp_cpus_done(unsigned int max_cpus) { mp_ops->cpus_done(); + synchronise_count_master(); } /* called from main before smp_init() */ diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index b42e71c7111..3e863186cd2 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -174,14 +174,6 @@ static int clock_hang_reported[NR_CPUS]; #endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */ -/* Initialize shared TLB - the should probably migrate to smtc_setup_cpus() */ - -void __init sanitize_tlb_entries(void) -{ - printk("Deprecated sanitize_tlb_entries() invoked\n"); -} - - /* * Configure shared TLB - VPC configuration bit must be set by caller */ @@ -339,7 +331,8 @@ static void smtc_tc_setup(int vpe, int tc, int cpu) /* In general, all TCs should have the same cpu_data indications */ memcpy(&cpu_data[cpu], &cpu_data[0], sizeof(struct cpuinfo_mips)); /* For 34Kf, start with TC/CPU 0 as sole owner of single FPU context */ - if (cpu_data[0].cputype == CPU_34K) + if (cpu_data[0].cputype == CPU_34K || + cpu_data[0].cputype == CPU_1004K) cpu_data[cpu].options &= ~MIPS_CPU_FPU; cpu_data[cpu].vpe_id = vpe; cpu_data[cpu].tc_id = tc; diff --git a/arch/mips/kernel/spram.c b/arch/mips/kernel/spram.c new file mode 100644 index 00000000000..6ddb507a87e --- /dev/null +++ b/arch/mips/kernel/spram.c @@ -0,0 +1,221 @@ +/* + * MIPS SPRAM support + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (C) 2007, 2008 MIPS Technologies, Inc. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <linux/stddef.h> + +#include <asm/cpu.h> +#include <asm/fpu.h> +#include <asm/mipsregs.h> +#include <asm/system.h> +#include <asm/r4kcache.h> +#include <asm/hazards.h> + +/* + * These definitions are correct for the 24K/34K/74K SPRAM sample + * implementation. The 4KS interpreted the tags differently... + */ +#define SPRAM_TAG0_ENABLE 0x00000080 +#define SPRAM_TAG0_PA_MASK 0xfffff000 +#define SPRAM_TAG1_SIZE_MASK 0xfffff000 + +#define SPRAM_TAG_STRIDE 8 + +#define ERRCTL_SPRAM (1 << 28) + +/* errctl access */ +#define read_c0_errctl(x) read_c0_ecc(x) +#define write_c0_errctl(x) write_c0_ecc(x) + +/* + * Different semantics to the set_c0_* function built by __BUILD_SET_C0 + */ +static __cpuinit unsigned int bis_c0_errctl(unsigned int set) +{ + unsigned int res; + res = read_c0_errctl(); + write_c0_errctl(res | set); + return res; +} + +static __cpuinit void ispram_store_tag(unsigned int offset, unsigned int data) +{ + unsigned int errctl; + + /* enable SPRAM tag access */ + errctl = bis_c0_errctl(ERRCTL_SPRAM); + ehb(); + + write_c0_taglo(data); + ehb(); + + cache_op(Index_Store_Tag_I, CKSEG0|offset); + ehb(); + + write_c0_errctl(errctl); + ehb(); +} + + +static __cpuinit unsigned int ispram_load_tag(unsigned int offset) +{ + unsigned int data; + unsigned int errctl; + + /* enable SPRAM tag access */ + errctl = bis_c0_errctl(ERRCTL_SPRAM); + ehb(); + cache_op(Index_Load_Tag_I, CKSEG0 | offset); + ehb(); + data = read_c0_taglo(); + ehb(); + write_c0_errctl(errctl); + ehb(); + + return data; +} + +static __cpuinit void dspram_store_tag(unsigned int offset, unsigned int data) +{ + unsigned int errctl; + + /* enable SPRAM tag access */ + errctl = bis_c0_errctl(ERRCTL_SPRAM); + ehb(); + write_c0_dtaglo(data); + ehb(); + cache_op(Index_Store_Tag_D, CKSEG0 | offset); + ehb(); + write_c0_errctl(errctl); + ehb(); +} + + +static __cpuinit unsigned int dspram_load_tag(unsigned int offset) +{ + unsigned int data; + unsigned int errctl; + + errctl = bis_c0_errctl(ERRCTL_SPRAM); + ehb(); + cache_op(Index_Load_Tag_D, CKSEG0 | offset); + ehb(); + data = read_c0_dtaglo(); + ehb(); + write_c0_errctl(errctl); + ehb(); + + return data; +} + +static __cpuinit void probe_spram(char *type, + unsigned int base, + unsigned int (*read)(unsigned int), + void (*write)(unsigned int, unsigned int)) +{ + unsigned int firstsize = 0, lastsize = 0; + unsigned int firstpa = 0, lastpa = 0, pa = 0; + unsigned int offset = 0; + unsigned int size, tag0, tag1; + unsigned int enabled; + int i; + + /* + * The limit is arbitrary but avoids the loop running away if + * the SPRAM tags are implemented differently + */ + + for (i = 0; i < 8; i++) { + tag0 = read(offset); + tag1 = read(offset+SPRAM_TAG_STRIDE); + pr_debug("DBG %s%d: tag0=%08x tag1=%08x\n", + type, i, tag0, tag1); + + size = tag1 & SPRAM_TAG1_SIZE_MASK; + + if (size == 0) + break; + + if (i != 0) { + /* tags may repeat... */ + if ((pa == firstpa && size == firstsize) || + (pa == lastpa && size == lastsize)) + break; + } + + /* Align base with size */ + base = (base + size - 1) & ~(size-1); + + /* reprogram the base address base address and enable */ + tag0 = (base & SPRAM_TAG0_PA_MASK) | SPRAM_TAG0_ENABLE; + write(offset, tag0); + + base += size; + + /* reread the tag */ + tag0 = read(offset); + pa = tag0 & SPRAM_TAG0_PA_MASK; + enabled = tag0 & SPRAM_TAG0_ENABLE; + + if (i == 0) { + firstpa = pa; + firstsize = size; + } + + lastpa = pa; + lastsize = size; + + if (strcmp(type, "DSPRAM") == 0) { + unsigned int *vp = (unsigned int *)(CKSEG1 | pa); + unsigned int v; +#define TDAT 0x5a5aa5a5 + vp[0] = TDAT; + vp[1] = ~TDAT; + + mb(); + + v = vp[0]; + if (v != TDAT) + printk(KERN_ERR "vp=%p wrote=%08x got=%08x\n", + vp, TDAT, v); + v = vp[1]; + if (v != ~TDAT) + printk(KERN_ERR "vp=%p wrote=%08x got=%08x\n", + vp+1, ~TDAT, v); + } + + pr_info("%s%d: PA=%08x,Size=%08x%s\n", + type, i, pa, size, enabled ? ",enabled" : ""); + offset += 2 * SPRAM_TAG_STRIDE; + } +} + +__cpuinit void spram_config(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config0; + + switch (c->cputype) { + case CPU_24K: + case CPU_34K: + case CPU_74K: + config0 = read_c0_config(); + /* FIXME: addresses are Malta specific */ + if (config0 & (1<<24)) { + probe_spram("ISPRAM", 0x1c000000, + &ispram_load_tag, &ispram_store_tag); + } + if (config0 & (1<<23)) + probe_spram("DSPRAM", 0x1c100000, + &dspram_load_tag, &dspram_store_tag); + } +} diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c new file mode 100644 index 00000000000..9021108eb9c --- /dev/null +++ b/arch/mips/kernel/sync-r4k.c @@ -0,0 +1,159 @@ +/* + * Count register synchronisation. + * + * All CPUs will have their count registers synchronised to the CPU0 expirelo + * value. This can cause a small timewarp for CPU0. All other CPU's should + * not have done anything significant (but they may have had interrupts + * enabled briefly - prom_smp_finish() should not be responsible for enabling + * interrupts...) + * + * FIXME: broken for SMTC + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/irqflags.h> +#include <linux/r4k-timer.h> + +#include <asm/atomic.h> +#include <asm/barrier.h> +#include <asm/cpumask.h> +#include <asm/mipsregs.h> + +static atomic_t __initdata count_start_flag = ATOMIC_INIT(0); +static atomic_t __initdata count_count_start = ATOMIC_INIT(0); +static atomic_t __initdata count_count_stop = ATOMIC_INIT(0); + +#define COUNTON 100 +#define NR_LOOPS 5 + +void __init synchronise_count_master(void) +{ + int i; + unsigned long flags; + unsigned int initcount; + int nslaves; + +#ifdef CONFIG_MIPS_MT_SMTC + /* + * SMTC needs to synchronise per VPE, not per CPU + * ignore for now + */ + return; +#endif + + pr_info("Checking COUNT synchronization across %u CPUs: ", + num_online_cpus()); + + local_irq_save(flags); + + /* + * Notify the slaves that it's time to start + */ + atomic_set(&count_start_flag, 1); + smp_wmb(); + + /* Count will be initialised to expirelo for all CPU's */ + initcount = expirelo; + + /* + * We loop a few times to get a primed instruction cache, + * then the last pass is more or less synchronised and + * the master and slaves each set their cycle counters to a known + * value all at once. This reduces the chance of having random offsets + * between the processors, and guarantees that the maximum + * delay between the cycle counters is never bigger than + * the latency of information-passing (cachelines) between + * two CPUs. + */ + + nslaves = num_online_cpus()-1; + for (i = 0; i < NR_LOOPS; i++) { + /* slaves loop on '!= ncpus' */ + while (atomic_read(&count_count_start) != nslaves) + mb(); + atomic_set(&count_count_stop, 0); + smp_wmb(); + + /* this lets the slaves write their count register */ + atomic_inc(&count_count_start); + + /* + * Everyone initialises count in the last loop: + */ + if (i == NR_LOOPS-1) + write_c0_count(initcount); + + /* + * Wait for all slaves to leave the synchronization point: + */ + while (atomic_read(&count_count_stop) != nslaves) + mb(); + atomic_set(&count_count_start, 0); + smp_wmb(); + atomic_inc(&count_count_stop); + } + /* Arrange for an interrupt in a short while */ + write_c0_compare(read_c0_count() + COUNTON); + + local_irq_restore(flags); + + /* + * i386 code reported the skew here, but the + * count registers were almost certainly out of sync + * so no point in alarming people + */ + printk("done.\n"); +} + +void __init synchronise_count_slave(void) +{ + int i; + unsigned long flags; + unsigned int initcount; + int ncpus; + +#ifdef CONFIG_MIPS_MT_SMTC + /* + * SMTC needs to synchronise per VPE, not per CPU + * ignore for now + */ + return; +#endif + + local_irq_save(flags); + + /* + * Not every cpu is online at the time this gets called, + * so we first wait for the master to say everyone is ready + */ + + while (!atomic_read(&count_start_flag)) + mb(); + + /* Count will be initialised to expirelo for all CPU's */ + initcount = expirelo; + + ncpus = num_online_cpus(); + for (i = 0; i < NR_LOOPS; i++) { + atomic_inc(&count_count_start); + while (atomic_read(&count_count_start) != ncpus) + mb(); + + /* + * Everyone initialises count in the last loop: + */ + if (i == NR_LOOPS-1) + write_c0_count(initcount); + + atomic_inc(&count_count_stop); + while (atomic_read(&count_count_stop) != ncpus) + mb(); + } + /* Arrange for an interrupt in a short while */ + write_c0_compare(read_c0_count() + COUNTON); + + local_irq_restore(flags); +} +#undef NR_LOOPS +#endif diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index b45a7093ca2..1f467d53464 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -38,7 +38,6 @@ int __weak rtc_mips_set_time(unsigned long sec) { return 0; } -EXPORT_SYMBOL(rtc_mips_set_time); int __weak rtc_mips_set_mmss(unsigned long nowtime) { @@ -50,13 +49,11 @@ int update_persistent_clock(struct timespec now) return rtc_mips_set_mmss(now.tv_sec); } -int null_perf_irq(void) +static int null_perf_irq(void) { return 0; } -EXPORT_SYMBOL(null_perf_irq); - int (*perf_irq)(void) = null_perf_irq; EXPORT_SYMBOL(perf_irq); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 984c0d0a7b4..cb8b0e2c795 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -22,6 +22,7 @@ #include <linux/kallsyms.h> #include <linux/bootmem.h> #include <linux/interrupt.h> +#include <linux/ptrace.h> #include <asm/bootinfo.h> #include <asm/branch.h> @@ -80,19 +81,22 @@ void (*board_bind_eic_interrupt)(int irq, int regset); static void show_raw_backtrace(unsigned long reg29) { - unsigned long *sp = (unsigned long *)reg29; + unsigned long *sp = (unsigned long *)(reg29 & ~3); unsigned long addr; printk("Call Trace:"); #ifdef CONFIG_KALLSYMS printk("\n"); #endif - while (!kstack_end(sp)) { - addr = *sp++; - if (__kernel_text_address(addr)) - print_ip_sym(addr); +#define IS_KVA01(a) ((((unsigned int)a) & 0xc0000000) == 0x80000000) + if (IS_KVA01(sp)) { + while (!kstack_end(sp)) { + addr = *sp++; + if (__kernel_text_address(addr)) + print_ip_sym(addr); + } + printk("\n"); } - printk("\n"); } #ifdef CONFIG_KALLSYMS @@ -192,16 +196,19 @@ EXPORT_SYMBOL(dump_stack); static void show_code(unsigned int __user *pc) { long i; + unsigned short __user *pc16 = NULL; printk("\nCode:"); + if ((unsigned long)pc & 1) + pc16 = (unsigned short __user *)((unsigned long)pc & ~1); for(i = -3 ; i < 6 ; i++) { unsigned int insn; - if (__get_user(insn, pc + i)) { + if (pc16 ? __get_user(insn, pc16 + i) : __get_user(insn, pc + i)) { printk(" (Bad address in epc)\n"); break; } - printk("%c%08x%c", (i?' ':'<'), insn, (i?' ':'>')); + printk("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>')); } } @@ -311,10 +318,21 @@ void show_regs(struct pt_regs *regs) void show_registers(const struct pt_regs *regs) { + const int field = 2 * sizeof(unsigned long); + __show_regs(regs); print_modules(); - printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n", - current->comm, task_pid_nr(current), current_thread_info(), current); + printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n", + current->comm, current->pid, current_thread_info(), current, + field, current_thread_info()->tp_value); + if (cpu_has_userlocal) { + unsigned long tls; + + tls = read_c0_userlocal(); + if (tls != current_thread_info()->tp_value) + printk("*HwTLS: %0*lx\n", field, tls); + } + show_stacktrace(current, regs); show_code((unsigned int __user *) regs->cp0_epc); printk("\n"); @@ -657,35 +675,24 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) force_sig_info(SIGFPE, &info, current); } -asmlinkage void do_bp(struct pt_regs *regs) +static void do_trap_or_bp(struct pt_regs *regs, unsigned int code, + const char *str) { - unsigned int opcode, bcode; siginfo_t info; - - if (__get_user(opcode, (unsigned int __user *) exception_epc(regs))) - goto out_sigsegv; - - /* - * There is the ancient bug in the MIPS assemblers that the break - * code starts left to bit 16 instead to bit 6 in the opcode. - * Gas is bug-compatible, but not always, grrr... - * We handle both cases with a simple heuristics. --macro - */ - bcode = ((opcode >> 6) & ((1 << 20) - 1)); - if (bcode < (1 << 10)) - bcode <<= 10; + char b[40]; /* - * (A short test says that IRIX 5.3 sends SIGTRAP for all break - * insns, even for break codes that indicate arithmetic failures. - * Weird ...) + * A short test says that IRIX 5.3 sends SIGTRAP for all trap + * insns, even for trap and break codes that indicate arithmetic + * failures. Weird ... * But should we continue the brokenness??? --macro */ - switch (bcode) { - case BRK_OVERFLOW << 10: - case BRK_DIVZERO << 10: - die_if_kernel("Break instruction in kernel code", regs); - if (bcode == (BRK_DIVZERO << 10)) + switch (code) { + case BRK_OVERFLOW: + case BRK_DIVZERO: + scnprintf(b, sizeof(b), "%s instruction in kernel code", str); + die_if_kernel(b, regs); + if (code == BRK_DIVZERO) info.si_code = FPE_INTDIV; else info.si_code = FPE_INTOVF; @@ -695,12 +702,34 @@ asmlinkage void do_bp(struct pt_regs *regs) force_sig_info(SIGFPE, &info, current); break; case BRK_BUG: - die("Kernel bug detected", regs); + die_if_kernel("Kernel bug detected", regs); + force_sig(SIGTRAP, current); break; default: - die_if_kernel("Break instruction in kernel code", regs); + scnprintf(b, sizeof(b), "%s instruction in kernel code", str); + die_if_kernel(b, regs); force_sig(SIGTRAP, current); } +} + +asmlinkage void do_bp(struct pt_regs *regs) +{ + unsigned int opcode, bcode; + + if (__get_user(opcode, (unsigned int __user *) exception_epc(regs))) + goto out_sigsegv; + + /* + * There is the ancient bug in the MIPS assemblers that the break + * code starts left to bit 16 instead to bit 6 in the opcode. + * Gas is bug-compatible, but not always, grrr... + * We handle both cases with a simple heuristics. --macro + */ + bcode = ((opcode >> 6) & ((1 << 20) - 1)); + if (bcode >= (1 << 10)) + bcode >>= 10; + + do_trap_or_bp(regs, bcode, "Break"); return; out_sigsegv: @@ -710,7 +739,6 @@ out_sigsegv: asmlinkage void do_tr(struct pt_regs *regs) { unsigned int opcode, tcode = 0; - siginfo_t info; if (__get_user(opcode, (unsigned int __user *) exception_epc(regs))) goto out_sigsegv; @@ -719,32 +747,7 @@ asmlinkage void do_tr(struct pt_regs *regs) if (!(opcode & OPCODE)) tcode = ((opcode >> 6) & ((1 << 10) - 1)); - /* - * (A short test says that IRIX 5.3 sends SIGTRAP for all trap - * insns, even for trap codes that indicate arithmetic failures. - * Weird ...) - * But should we continue the brokenness??? --macro - */ - switch (tcode) { - case BRK_OVERFLOW: - case BRK_DIVZERO: - die_if_kernel("Trap instruction in kernel code", regs); - if (tcode == BRK_DIVZERO) - info.si_code = FPE_INTDIV; - else - info.si_code = FPE_INTOVF; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *) regs->cp0_epc; - force_sig_info(SIGFPE, &info, current); - break; - case BRK_BUG: - die("Kernel bug detected", regs); - break; - default: - die_if_kernel("Trap instruction in kernel code", regs); - force_sig(SIGTRAP, current); - } + do_trap_or_bp(regs, tcode, "Trap"); return; out_sigsegv: @@ -985,6 +988,21 @@ asmlinkage void do_reserved(struct pt_regs *regs) (regs->cp0_cause & 0x7f) >> 2); } +static int __initdata l1parity = 1; +static int __init nol1parity(char *s) +{ + l1parity = 0; + return 1; +} +__setup("nol1par", nol1parity); +static int __initdata l2parity = 1; +static int __init nol2parity(char *s) +{ + l2parity = 0; + return 1; +} +__setup("nol2par", nol2parity); + /* * Some MIPS CPUs can enable/disable for cache parity detection, but do * it different ways. @@ -994,6 +1012,62 @@ static inline void parity_protection_init(void) switch (current_cpu_type()) { case CPU_24K: case CPU_34K: + case CPU_74K: + case CPU_1004K: + { +#define ERRCTL_PE 0x80000000 +#define ERRCTL_L2P 0x00800000 + unsigned long errctl; + unsigned int l1parity_present, l2parity_present; + + errctl = read_c0_ecc(); + errctl &= ~(ERRCTL_PE|ERRCTL_L2P); + + /* probe L1 parity support */ + write_c0_ecc(errctl | ERRCTL_PE); + back_to_back_c0_hazard(); + l1parity_present = (read_c0_ecc() & ERRCTL_PE); + + /* probe L2 parity support */ + write_c0_ecc(errctl|ERRCTL_L2P); + back_to_back_c0_hazard(); + l2parity_present = (read_c0_ecc() & ERRCTL_L2P); + + if (l1parity_present && l2parity_present) { + if (l1parity) + errctl |= ERRCTL_PE; + if (l1parity ^ l2parity) + errctl |= ERRCTL_L2P; + } else if (l1parity_present) { + if (l1parity) + errctl |= ERRCTL_PE; + } else if (l2parity_present) { + if (l2parity) + errctl |= ERRCTL_L2P; + } else { + /* No parity available */ + } + + printk(KERN_INFO "Writing ErrCtl register=%08lx\n", errctl); + + write_c0_ecc(errctl); + back_to_back_c0_hazard(); + errctl = read_c0_ecc(); + printk(KERN_INFO "Readback ErrCtl register=%08lx\n", errctl); + + if (l1parity_present) + printk(KERN_INFO "Cache parity protection %sabled\n", + (errctl & ERRCTL_PE) ? "en" : "dis"); + + if (l2parity_present) { + if (l1parity_present && l1parity) + errctl ^= ERRCTL_L2P; + printk(KERN_INFO "L2 cache parity protection %sabled\n", + (errctl & ERRCTL_L2P) ? "en" : "dis"); + } + } + break; + case CPU_5KC: write_c0_ecc(0x80000000); back_to_back_c0_hazard(); @@ -1306,6 +1380,17 @@ int cp0_compare_irq; int cp0_perfcount_irq; EXPORT_SYMBOL_GPL(cp0_perfcount_irq); +static int __cpuinitdata noulri; + +static int __init ulri_disable(char *s) +{ + pr_info("Disabling ulri\n"); + noulri = 1; + + return 1; +} +__setup("noulri", ulri_disable); + void __cpuinit per_cpu_trap_init(void) { unsigned int cpu = smp_processor_id(); @@ -1342,16 +1427,14 @@ void __cpuinit per_cpu_trap_init(void) change_c0_status(ST0_CU|ST0_MX|ST0_RE|ST0_FR|ST0_BEV|ST0_TS|ST0_KX|ST0_SX|ST0_UX, status_set); -#ifdef CONFIG_CPU_MIPSR2 if (cpu_has_mips_r2) { unsigned int enable = 0x0000000f; - if (cpu_has_userlocal) + if (!noulri && cpu_has_userlocal) enable |= (1 << 29); write_c0_hwrena(enable); } -#endif #ifdef CONFIG_MIPS_MT_SMTC if (!secondaryTC) { diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h index 8977eb585a3..76278653844 100644 --- a/arch/mips/math-emu/ieee754dp.h +++ b/arch/mips/math-emu/ieee754dp.h @@ -46,7 +46,7 @@ #define DPDNORMX DPDNORMx(xm, xe) #define DPDNORMY DPDNORMx(ym, ye) -static __inline ieee754dp builddp(int s, int bx, u64 m) +static inline ieee754dp builddp(int s, int bx, u64 m) { ieee754dp r; diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h index 9917c1e4d94..d9e3586b5bc 100644 --- a/arch/mips/math-emu/ieee754sp.h +++ b/arch/mips/math-emu/ieee754sp.h @@ -51,7 +51,7 @@ #define SPDNORMX SPDNORMx(xm, xe) #define SPDNORMY SPDNORMx(ym, ye) -static __inline ieee754sp buildsp(int s, int bx, unsigned m) +static inline ieee754sp buildsp(int s, int bx, unsigned m) { ieee754sp r; diff --git a/arch/mips/mips-boards/generic/Makefile b/arch/mips/mips-boards/generic/Makefile index b31d8dfed1b..f7f87fc09d1 100644 --- a/arch/mips/mips-boards/generic/Makefile +++ b/arch/mips/mips-boards/generic/Makefile @@ -20,6 +20,7 @@ obj-y := reset.o display.o init.o memory.o \ cmdline.o time.o +obj-y += amon.o obj-$(CONFIG_EARLY_PRINTK) += console.o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/mips/mips-boards/generic/amon.c b/arch/mips/mips-boards/generic/amon.c new file mode 100644 index 00000000000..b7633fda418 --- /dev/null +++ b/arch/mips/mips-boards/generic/amon.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2007 MIPS Technologies, Inc. + * All rights reserved. + + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Arbitrary Monitor interface + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm-mips/addrspace.h> +#include <asm-mips/mips-boards/launch.h> +#include <asm-mips/mipsmtregs.h> + +int amon_cpu_avail(int cpu) +{ + struct cpulaunch *launch = (struct cpulaunch *)KSEG0ADDR(CPULAUNCH); + + if (cpu < 0 || cpu >= NCPULAUNCH) { + pr_debug("avail: cpu%d is out of range\n", cpu); + return 0; + } + + launch += cpu; + if (!(launch->flags & LAUNCH_FREADY)) { + pr_debug("avail: cpu%d is not ready\n", cpu); + return 0; + } + if (launch->flags & (LAUNCH_FGO|LAUNCH_FGONE)) { + pr_debug("avail: too late.. cpu%d is already gone\n", cpu); + return 0; + } + + return 1; +} + +void amon_cpu_start(int cpu, + unsigned long pc, unsigned long sp, + unsigned long gp, unsigned long a0) +{ + volatile struct cpulaunch *launch = + (struct cpulaunch *)KSEG0ADDR(CPULAUNCH); + + if (!amon_cpu_avail(cpu)) + return; + if (cpu == smp_processor_id()) { + pr_debug("launch: I am cpu%d!\n", cpu); + return; + } + launch += cpu; + + pr_debug("launch: starting cpu%d\n", cpu); + + launch->pc = pc; + launch->gp = gp; + launch->sp = sp; + launch->a0 = a0; + + /* Make sure target sees parameters before the go bit */ + smp_mb(); + + launch->flags |= LAUNCH_FGO; + while ((launch->flags & LAUNCH_FGONE) == 0) + ; + pr_debug("launch: cpu%d gone!\n", cpu); +} diff --git a/arch/mips/mips-boards/generic/init.c b/arch/mips/mips-boards/generic/init.c index 1695dca5506..83b9dc73920 100644 --- a/arch/mips/mips-boards/generic/init.c +++ b/arch/mips/mips-boards/generic/init.c @@ -226,7 +226,7 @@ void __init kgdb_config(void) } #endif -void __init mips_nmi_setup(void) +static void __init mips_nmi_setup(void) { void *base; extern char except_vec_nmi; @@ -238,7 +238,7 @@ void __init mips_nmi_setup(void) flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } -void __init mips_ejtag_setup(void) +static void __init mips_ejtag_setup(void) { void *base; extern char except_vec_ejtag_debug; @@ -295,15 +295,21 @@ void __init prom_init(void) break; case MIPS_REVISION_CORID_CORE_MSC: case MIPS_REVISION_CORID_CORE_FPGA2: - case MIPS_REVISION_CORID_CORE_FPGA3: - case MIPS_REVISION_CORID_CORE_FPGA4: case MIPS_REVISION_CORID_CORE_24K: - case MIPS_REVISION_CORID_CORE_EMUL_MSC: + /* + * SOCit/ROCit support is essentially identical + * but make an attempt to distinguish them + */ mips_revision_sconid = MIPS_REVISION_SCON_SOCIT; break; + case MIPS_REVISION_CORID_CORE_FPGA3: + case MIPS_REVISION_CORID_CORE_FPGA4: + case MIPS_REVISION_CORID_CORE_FPGA5: + case MIPS_REVISION_CORID_CORE_EMUL_MSC: default: - mips_display_message("CC Error"); - while (1); /* We die here... */ + /* See above */ + mips_revision_sconid = MIPS_REVISION_SCON_ROCIT; + break; } } @@ -418,6 +424,9 @@ void __init prom_init(void) #ifdef CONFIG_SERIAL_8250_CONSOLE console_config(); #endif +#ifdef CONFIG_MIPS_CMP + register_smp_ops(&cmp_smp_ops); +#endif #ifdef CONFIG_MIPS_MT_SMP register_smp_ops(&vsmp_smp_ops); #endif diff --git a/arch/mips/mips-boards/generic/memory.c b/arch/mips/mips-boards/generic/memory.c index dc272c18823..5e443bba566 100644 --- a/arch/mips/mips-boards/generic/memory.c +++ b/arch/mips/mips-boards/generic/memory.c @@ -37,7 +37,7 @@ enum yamon_memtypes { yamon_prom, yamon_free, }; -struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS]; +static struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS]; #ifdef DEBUG static char *mtypes[3] = { @@ -50,7 +50,7 @@ static char *mtypes[3] = { /* determined physical memory size, not overridden by command line args */ unsigned long physical_memsize = 0L; -struct prom_pmemblock * __init prom_getmdesc(void) +static struct prom_pmemblock * __init prom_getmdesc(void) { char *memsize_str; unsigned int memsize; diff --git a/arch/mips/mips-boards/generic/time.c b/arch/mips/mips-boards/generic/time.c index b50e0fc406a..008fd82b584 100644 --- a/arch/mips/mips-boards/generic/time.c +++ b/arch/mips/mips-boards/generic/time.c @@ -55,16 +55,36 @@ unsigned long cpu_khz; static int mips_cpu_timer_irq; +static int mips_cpu_perf_irq; extern int cp0_perfcount_irq; +DEFINE_PER_CPU(unsigned int, tickcount); +#define tickcount_this_cpu __get_cpu_var(tickcount) +static unsigned long ledbitmask; + static void mips_timer_dispatch(void) { +#if defined(CONFIG_MIPS_MALTA) || defined(CONFIG_MIPS_ATLAS) + /* + * Yes, this is very tacky, won't work as expected with SMTC and + * dyntick will break it, + * but it gives me a nice warm feeling during debug + */ +#define LEDBAR 0xbf000408 + if (tickcount_this_cpu++ >= HZ) { + tickcount_this_cpu = 0; + change_bit(smp_processor_id(), &ledbitmask); + smp_wmb(); /* Make sure every one else sees the change */ + /* This will pick up any recent changes made by other CPU's */ + *(unsigned int *)LEDBAR = ledbitmask; + } +#endif do_IRQ(mips_cpu_timer_irq); } static void mips_perf_dispatch(void) { - do_IRQ(cp0_perfcount_irq); + do_IRQ(mips_cpu_perf_irq); } /* @@ -127,21 +147,20 @@ unsigned long read_persistent_clock(void) return mc146818_get_cmos_time(); } -void __init plat_perf_setup(void) +static void __init plat_perf_setup(void) { - cp0_perfcount_irq = -1; - #ifdef MSC01E_INT_BASE if (cpu_has_veic) { set_vi_handler(MSC01E_INT_PERFCTR, mips_perf_dispatch); - cp0_perfcount_irq = MSC01E_INT_BASE + MSC01E_INT_PERFCTR; + mips_cpu_perf_irq = MSC01E_INT_BASE + MSC01E_INT_PERFCTR; } else #endif if (cp0_perfcount_irq >= 0) { if (cpu_has_vint) set_vi_handler(cp0_perfcount_irq, mips_perf_dispatch); + mips_cpu_perf_irq = MIPS_CPU_IRQ_BASE + cp0_perfcount_irq; #ifdef CONFIG_SMP - set_irq_handler(cp0_perfcount_irq, handle_percpu_irq); + set_irq_handler(mips_cpu_perf_irq, handle_percpu_irq); #endif } } diff --git a/arch/mips/mips-boards/malta/Makefile b/arch/mips/mips-boards/malta/Makefile index 931ca4600a6..8dc6e2ac4c0 100644 --- a/arch/mips/mips-boards/malta/Makefile +++ b/arch/mips/mips-boards/malta/Makefile @@ -22,6 +22,7 @@ obj-y := malta_int.o malta_platform.o malta_setup.o obj-$(CONFIG_MTD) += malta_mtd.o +# FIXME FIXME FIXME obj-$(CONFIG_MIPS_MT_SMTC) += malta_smtc.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/mips-boards/malta/malta_int.c b/arch/mips/mips-boards/malta/malta_int.c index dbe60eb55e2..8c495104b32 100644 --- a/arch/mips/mips-boards/malta/malta_int.c +++ b/arch/mips/mips-boards/malta/malta_int.c @@ -31,6 +31,7 @@ #include <linux/kernel.h> #include <linux/random.h> +#include <asm/traps.h> #include <asm/i8259.h> #include <asm/irq_cpu.h> #include <asm/irq_regs.h> @@ -41,6 +42,14 @@ #include <asm/mips-boards/generic.h> #include <asm/mips-boards/msc01_pci.h> #include <asm/msc01_ic.h> +#include <asm/gic.h> +#include <asm/gcmpregs.h> + +int gcmp_present = -1; +int gic_present; +static unsigned long _msc01_biu_base; +static unsigned long _gcmp_base; +static unsigned int ipi_map[NR_CPUS]; static DEFINE_SPINLOCK(mips_irq_lock); @@ -121,6 +130,17 @@ static void malta_hw0_irqdispatch(void) do_IRQ(MALTA_INT_BASE + irq); } +static void malta_ipi_irqdispatch(void) +{ + int irq; + + irq = gic_get_int(); + if (irq < 0) + return; /* interrupt has already been cleared */ + + do_IRQ(MIPS_GIC_IRQ_BASE + irq); +} + static void corehi_irqdispatch(void) { unsigned int intedge, intsteer, pcicmd, pcibadaddr; @@ -257,12 +277,61 @@ asmlinkage void plat_irq_dispatch(void) if (irq == MIPSCPU_INT_I8259A) malta_hw0_irqdispatch(); + else if (gic_present && ((1 << irq) & ipi_map[smp_processor_id()])) + malta_ipi_irqdispatch(); else if (irq >= 0) do_IRQ(MIPS_CPU_IRQ_BASE + irq); else spurious_interrupt(); } +#ifdef CONFIG_MIPS_MT_SMP + + +#define GIC_MIPS_CPU_IPI_RESCHED_IRQ 3 +#define GIC_MIPS_CPU_IPI_CALL_IRQ 4 + +#define MIPS_CPU_IPI_RESCHED_IRQ 0 /* SW int 0 for resched */ +#define C_RESCHED C_SW0 +#define MIPS_CPU_IPI_CALL_IRQ 1 /* SW int 1 for resched */ +#define C_CALL C_SW1 +static int cpu_ipi_resched_irq, cpu_ipi_call_irq; + +static void ipi_resched_dispatch(void) +{ + do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ); +} + +static void ipi_call_dispatch(void) +{ + do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ); +} + +static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) +{ + return IRQ_HANDLED; +} + +static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) +{ + smp_call_function_interrupt(); + + return IRQ_HANDLED; +} + +static struct irqaction irq_resched = { + .handler = ipi_resched_interrupt, + .flags = IRQF_DISABLED|IRQF_PERCPU, + .name = "IPI_resched" +}; + +static struct irqaction irq_call = { + .handler = ipi_call_interrupt, + .flags = IRQF_DISABLED|IRQF_PERCPU, + .name = "IPI_call" +}; +#endif /* CONFIG_MIPS_MT_SMP */ + static struct irqaction i8259irq = { .handler = no_action, .name = "XT-PIC cascade" @@ -273,13 +342,13 @@ static struct irqaction corehi_irqaction = { .name = "CoreHi" }; -msc_irqmap_t __initdata msc_irqmap[] = { +static msc_irqmap_t __initdata msc_irqmap[] = { {MSC01C_INT_TMR, MSC01_IRQ_EDGE, 0}, {MSC01C_INT_PCI, MSC01_IRQ_LEVEL, 0}, }; -int __initdata msc_nr_irqs = ARRAY_SIZE(msc_irqmap); +static int __initdata msc_nr_irqs = ARRAY_SIZE(msc_irqmap); -msc_irqmap_t __initdata msc_eicirqmap[] = { +static msc_irqmap_t __initdata msc_eicirqmap[] = { {MSC01E_INT_SW0, MSC01_IRQ_LEVEL, 0}, {MSC01E_INT_SW1, MSC01_IRQ_LEVEL, 0}, {MSC01E_INT_I8259A, MSC01_IRQ_LEVEL, 0}, @@ -291,15 +360,90 @@ msc_irqmap_t __initdata msc_eicirqmap[] = { {MSC01E_INT_PERFCTR, MSC01_IRQ_LEVEL, 0}, {MSC01E_INT_CPUCTR, MSC01_IRQ_LEVEL, 0} }; -int __initdata msc_nr_eicirqs = ARRAY_SIZE(msc_eicirqmap); + +static int __initdata msc_nr_eicirqs = ARRAY_SIZE(msc_eicirqmap); + +/* + * This GIC specific tabular array defines the association between External + * Interrupts and CPUs/Core Interrupts. The nature of the External + * Interrupts is also defined here - polarity/trigger. + */ +static struct gic_intr_map gic_intr_map[] = { + { GIC_EXT_INTR(0), X, X, X, X, 0 }, + { GIC_EXT_INTR(1), X, X, X, X, 0 }, + { GIC_EXT_INTR(2), X, X, X, X, 0 }, + { GIC_EXT_INTR(3), 0, GIC_CPU_INT0, GIC_POL_POS, GIC_TRIG_LEVEL, 0 }, + { GIC_EXT_INTR(4), 0, GIC_CPU_INT1, GIC_POL_POS, GIC_TRIG_LEVEL, 0 }, + { GIC_EXT_INTR(5), 0, GIC_CPU_INT2, GIC_POL_POS, GIC_TRIG_LEVEL, 0 }, + { GIC_EXT_INTR(6), 0, GIC_CPU_INT3, GIC_POL_POS, GIC_TRIG_LEVEL, 0 }, + { GIC_EXT_INTR(7), 0, GIC_CPU_INT4, GIC_POL_POS, GIC_TRIG_LEVEL, 0 }, + { GIC_EXT_INTR(8), 0, GIC_CPU_INT3, GIC_POL_POS, GIC_TRIG_LEVEL, 0 }, + { GIC_EXT_INTR(9), 0, GIC_CPU_INT3, GIC_POL_POS, GIC_TRIG_LEVEL, 0 }, + { GIC_EXT_INTR(10), X, X, X, X, 0 }, + { GIC_EXT_INTR(11), X, X, X, X, 0 }, + { GIC_EXT_INTR(12), 0, GIC_CPU_INT3, GIC_POL_POS, GIC_TRIG_LEVEL, 0 }, + { GIC_EXT_INTR(13), 0, GIC_MAP_TO_NMI_MSK, GIC_POL_POS, GIC_TRIG_LEVEL, 0 }, + { GIC_EXT_INTR(14), 0, GIC_MAP_TO_NMI_MSK, GIC_POL_POS, GIC_TRIG_LEVEL, 0 }, + { GIC_EXT_INTR(15), X, X, X, X, 0 }, + { GIC_EXT_INTR(16), 0, GIC_CPU_INT1, GIC_POL_POS, GIC_TRIG_EDGE, 1 }, + { GIC_EXT_INTR(17), 0, GIC_CPU_INT2, GIC_POL_POS, GIC_TRIG_EDGE, 1 }, + { GIC_EXT_INTR(18), 1, GIC_CPU_INT1, GIC_POL_POS, GIC_TRIG_EDGE, 1 }, + { GIC_EXT_INTR(19), 1, GIC_CPU_INT2, GIC_POL_POS, GIC_TRIG_EDGE, 1 }, + { GIC_EXT_INTR(20), 2, GIC_CPU_INT1, GIC_POL_POS, GIC_TRIG_EDGE, 1 }, + { GIC_EXT_INTR(21), 2, GIC_CPU_INT2, GIC_POL_POS, GIC_TRIG_EDGE, 1 }, + { GIC_EXT_INTR(22), 3, GIC_CPU_INT1, GIC_POL_POS, GIC_TRIG_EDGE, 1 }, + { GIC_EXT_INTR(23), 3, GIC_CPU_INT2, GIC_POL_POS, GIC_TRIG_EDGE, 1 }, +}; + +/* + * GCMP needs to be detected before any SMP initialisation + */ +int __init gcmp_probe(unsigned long addr, unsigned long size) +{ + if (gcmp_present >= 0) + return gcmp_present; + + _gcmp_base = (unsigned long) ioremap_nocache(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ); + _msc01_biu_base = (unsigned long) ioremap_nocache(MSC01_BIU_REG_BASE, MSC01_BIU_ADDRSPACE_SZ); + gcmp_present = (GCMPGCB(GCMPB) & GCMP_GCB_GCMPB_GCMPBASE_MSK) == GCMP_BASE_ADDR; + + if (gcmp_present) + printk(KERN_DEBUG "GCMP present\n"); + return gcmp_present; +} + +void __init fill_ipi_map(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(gic_intr_map); i++) { + if (gic_intr_map[i].ipiflag && (gic_intr_map[i].cpunum != X)) + ipi_map[gic_intr_map[i].cpunum] |= + (1 << (gic_intr_map[i].pin + 2)); + } +} void __init arch_init_irq(void) { + int gic_present, gcmp_present; + init_i8259_irqs(); if (!cpu_has_veic) mips_cpu_irq_init(); + gcmp_present = gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ); + if (gcmp_present) { + GCMPGCB(GICBA) = GIC_BASE_ADDR | GCMP_GCB_GICBA_EN_MSK; + gic_present = 1; + } else { + _msc01_biu_base = (unsigned long) ioremap_nocache(MSC01_BIU_REG_BASE, MSC01_BIU_ADDRSPACE_SZ); + gic_present = (REG(_msc01_biu_base, MSC01_SC_CFG) & + MSC01_SC_CFG_GICPRES_MSK) >> MSC01_SC_CFG_GICPRES_SHF; + } + if (gic_present) + printk(KERN_DEBUG "GIC present\n"); + switch (mips_revision_sconid) { case MIPS_REVISION_SCON_SOCIT: case MIPS_REVISION_SCON_ROCIT: @@ -360,4 +504,206 @@ void __init arch_init_irq(void) setup_irq(MIPS_CPU_IRQ_BASE+MIPSCPU_INT_COREHI, &corehi_irqaction); } + +#if defined(CONFIG_MIPS_MT_SMP) + if (gic_present) { + /* FIXME */ + int i; + struct { + unsigned int resched; + unsigned int call; + } ipiirq[] = { + { + .resched = GIC_IPI_EXT_INTR_RESCHED_VPE0, + .call = GIC_IPI_EXT_INTR_CALLFNC_VPE0}, + { + .resched = GIC_IPI_EXT_INTR_RESCHED_VPE1, + .call = GIC_IPI_EXT_INTR_CALLFNC_VPE1 + }, { + .resched = GIC_IPI_EXT_INTR_RESCHED_VPE2, + .call = GIC_IPI_EXT_INTR_CALLFNC_VPE2 + }, { + .resched = GIC_IPI_EXT_INTR_RESCHED_VPE3, + .call = GIC_IPI_EXT_INTR_CALLFNC_VPE3 + } + }; +#define NIPI (sizeof(ipiirq)/sizeof(ipiirq[0])) + fill_ipi_map(); + gic_init(GIC_BASE_ADDR, GIC_ADDRSPACE_SZ, gic_intr_map, ARRAY_SIZE(gic_intr_map), MIPS_GIC_IRQ_BASE); + if (!gcmp_present) { + /* Enable the GIC */ + i = REG(_msc01_biu_base, MSC01_SC_CFG); + REG(_msc01_biu_base, MSC01_SC_CFG) = + (i | (0x1 << MSC01_SC_CFG_GICENA_SHF)); + pr_debug("GIC Enabled\n"); + } + + /* set up ipi interrupts */ + if (cpu_has_vint) { + set_vi_handler(MIPSCPU_INT_IPI0, malta_ipi_irqdispatch); + set_vi_handler(MIPSCPU_INT_IPI1, malta_ipi_irqdispatch); + } + /* Argh.. this really needs sorting out.. */ + printk("CPU%d: status register was %08x\n", smp_processor_id(), read_c0_status()); + write_c0_status(read_c0_status() | STATUSF_IP3 | STATUSF_IP4); + printk("CPU%d: status register now %08x\n", smp_processor_id(), read_c0_status()); + write_c0_status(0x1100dc00); + printk("CPU%d: status register frc %08x\n", smp_processor_id(), read_c0_status()); + for (i = 0; i < NIPI; i++) { + setup_irq(MIPS_GIC_IRQ_BASE + ipiirq[i].resched, &irq_resched); + setup_irq(MIPS_GIC_IRQ_BASE + ipiirq[i].call, &irq_call); + + set_irq_handler(MIPS_GIC_IRQ_BASE + ipiirq[i].resched, handle_percpu_irq); + set_irq_handler(MIPS_GIC_IRQ_BASE + ipiirq[i].call, handle_percpu_irq); + } + } else { + /* set up ipi interrupts */ + if (cpu_has_veic) { + set_vi_handler (MSC01E_INT_SW0, ipi_resched_dispatch); + set_vi_handler (MSC01E_INT_SW1, ipi_call_dispatch); + cpu_ipi_resched_irq = MSC01E_INT_SW0; + cpu_ipi_call_irq = MSC01E_INT_SW1; + } else { + if (cpu_has_vint) { + set_vi_handler (MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch); + set_vi_handler (MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch); + } + cpu_ipi_resched_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ; + cpu_ipi_call_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ; + } + + setup_irq(cpu_ipi_resched_irq, &irq_resched); + setup_irq(cpu_ipi_call_irq, &irq_call); + + set_irq_handler(cpu_ipi_resched_irq, handle_percpu_irq); + set_irq_handler(cpu_ipi_call_irq, handle_percpu_irq); + } +#endif +} + +void malta_be_init(void) +{ + if (gcmp_present) { + /* Could change CM error mask register */ + } +} + + +static char *tr[8] = { + "mem", "gcr", "gic", "mmio", + "0x04", "0x05", "0x06", "0x07" +}; + +static char *mcmd[32] = { + [0x00] = "0x00", + [0x01] = "Legacy Write", + [0x02] = "Legacy Read", + [0x03] = "0x03", + [0x04] = "0x04", + [0x05] = "0x05", + [0x06] = "0x06", + [0x07] = "0x07", + [0x08] = "Coherent Read Own", + [0x09] = "Coherent Read Share", + [0x0a] = "Coherent Read Discard", + [0x0b] = "Coherent Ready Share Always", + [0x0c] = "Coherent Upgrade", + [0x0d] = "Coherent Writeback", + [0x0e] = "0x0e", + [0x0f] = "0x0f", + [0x10] = "Coherent Copyback", + [0x11] = "Coherent Copyback Invalidate", + [0x12] = "Coherent Invalidate", + [0x13] = "Coherent Write Invalidate", + [0x14] = "Coherent Completion Sync", + [0x15] = "0x15", + [0x16] = "0x16", + [0x17] = "0x17", + [0x18] = "0x18", + [0x19] = "0x19", + [0x1a] = "0x1a", + [0x1b] = "0x1b", + [0x1c] = "0x1c", + [0x1d] = "0x1d", + [0x1e] = "0x1e", + [0x1f] = "0x1f" +}; + +static char *core[8] = { + "Invalid/OK", "Invalid/Data", + "Shared/OK", "Shared/Data", + "Modified/OK", "Modified/Data", + "Exclusive/OK", "Exclusive/Data" +}; + +static char *causes[32] = { + "None", "GC_WR_ERR", "GC_RD_ERR", "COH_WR_ERR", + "COH_RD_ERR", "MMIO_WR_ERR", "MMIO_RD_ERR", "0x07", + "0x08", "0x09", "0x0a", "0x0b", + "0x0c", "0x0d", "0x0e", "0x0f", + "0x10", "0x11", "0x12", "0x13", + "0x14", "0x15", "0x16", "INTVN_WR_ERR", + "INTVN_RD_ERR", "0x19", "0x1a", "0x1b", + "0x1c", "0x1d", "0x1e", "0x1f" +}; + +int malta_be_handler(struct pt_regs *regs, int is_fixup) +{ + /* This duplicates the handling in do_be which seems wrong */ + int retval = is_fixup ? MIPS_BE_FIXUP : MIPS_BE_FATAL; + + if (gcmp_present) { + unsigned long cm_error = GCMPGCB(GCMEC); + unsigned long cm_addr = GCMPGCB(GCMEA); + unsigned long cm_other = GCMPGCB(GCMEO); + unsigned long cause, ocause; + char buf[256]; + + cause = (cm_error & GCMP_GCB_GMEC_ERROR_TYPE_MSK); + if (cause != 0) { + cause >>= GCMP_GCB_GMEC_ERROR_TYPE_SHF; + if (cause < 16) { + unsigned long cca_bits = (cm_error >> 15) & 7; + unsigned long tr_bits = (cm_error >> 12) & 7; + unsigned long mcmd_bits = (cm_error >> 7) & 0x1f; + unsigned long stag_bits = (cm_error >> 3) & 15; + unsigned long sport_bits = (cm_error >> 0) & 7; + + snprintf(buf, sizeof(buf), + "CCA=%lu TR=%s MCmd=%s STag=%lu " + "SPort=%lu\n", + cca_bits, tr[tr_bits], mcmd[mcmd_bits], + stag_bits, sport_bits); + } else { + /* glob state & sresp together */ + unsigned long c3_bits = (cm_error >> 18) & 7; + unsigned long c2_bits = (cm_error >> 15) & 7; + unsigned long c1_bits = (cm_error >> 12) & 7; + unsigned long c0_bits = (cm_error >> 9) & 7; + unsigned long sc_bit = (cm_error >> 8) & 1; + unsigned long mcmd_bits = (cm_error >> 3) & 0x1f; + unsigned long sport_bits = (cm_error >> 0) & 7; + snprintf(buf, sizeof(buf), + "C3=%s C2=%s C1=%s C0=%s SC=%s " + "MCmd=%s SPort=%lu\n", + core[c3_bits], core[c2_bits], + core[c1_bits], core[c0_bits], + sc_bit ? "True" : "False", + mcmd[mcmd_bits], sport_bits); + } + + ocause = (cm_other & GCMP_GCB_GMEO_ERROR_2ND_MSK) >> + GCMP_GCB_GMEO_ERROR_2ND_SHF; + + printk("CM_ERROR=%08lx %s <%s>\n", cm_error, + causes[cause], buf); + printk("CM_ADDR =%08lx\n", cm_addr); + printk("CM_OTHER=%08lx %s\n", cm_other, causes[ocause]); + + /* reprime cause register */ + GCMPGCB(GCMEC) = 0; + } + } + + return retval; } diff --git a/arch/mips/mips-boards/malta/malta_setup.c b/arch/mips/mips-boards/malta/malta_setup.c index 2cd8f5734b3..e7cad54936c 100644 --- a/arch/mips/mips-boards/malta/malta_setup.c +++ b/arch/mips/mips-boards/malta/malta_setup.c @@ -1,7 +1,7 @@ /* * Carsten Langgaard, carstenl@mips.com * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - * Copyright (C) Dmitri Vorobiev + * Copyright (C) 2008 Dmitri Vorobiev * * This program is free software; you can distribute it and/or modify it * under the terms of the GNU General Public License (Version 2) as @@ -36,7 +36,10 @@ #include <linux/console.h> #endif -struct resource standard_io_resources[] = { +extern void malta_be_init(void); +extern int malta_be_handler(struct pt_regs *regs, int is_fixup); + +static struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, @@ -220,4 +223,7 @@ void __init plat_mem_setup(void) screen_info_setup(); #endif mips_reboot_setup(); + + board_be_init = malta_be_init; + board_be_handler = malta_be_handler; } diff --git a/arch/mips/mipssim/sim_setup.c b/arch/mips/mipssim/sim_setup.c index d49fe73426b..7c7148ef264 100644 --- a/arch/mips/mipssim/sim_setup.c +++ b/arch/mips/mipssim/sim_setup.c @@ -39,9 +39,6 @@ static void __init serial_init(void); unsigned int _isbonito = 0; -extern void __init sanitize_tlb_entries(void); - - const char *get_system_type(void) { return "MIPSsim"; @@ -55,9 +52,6 @@ void __init plat_mem_setup(void) pr_info("Linux started...\n"); -#ifdef CONFIG_MIPS_MT_SMP - sanitize_tlb_entries(); -#endif } extern struct plat_smp_ops ssmtc_smp_ops; diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index c6f832e0f41..48731020ca0 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -4,30 +4,29 @@ obj-y += cache.o dma-default.o extable.o fault.o \ init.o pgtable.o tlbex.o tlbex-fault.o \ - uasm.o + uasm.o page.o obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o obj-$(CONFIG_64BIT) += pgtable-64.o obj-$(CONFIG_HIGHMEM) += highmem.o -obj-$(CONFIG_CPU_LOONGSON2) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_MIPS32) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_MIPS64) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_NEVADA) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R10000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R3000) += c-r3k.o tlb-r3k.o pg-r4k.o -obj-$(CONFIG_CPU_R4300) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R4X00) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R5000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R5432) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R8000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r8k.o -obj-$(CONFIG_CPU_RM7000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_RM9000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o pg-sb1.o \ - tlb-r4k.o -obj-$(CONFIG_CPU_TX39XX) += c-tx39.o pg-r4k.o tlb-r3k.o -obj-$(CONFIG_CPU_TX49XX) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_VR41XX) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_LOONGSON2) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_MIPS32) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_MIPS64) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_NEVADA) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_R10000) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_R3000) += c-r3k.o tlb-r3k.o +obj-$(CONFIG_CPU_R4300) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_R4X00) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_R5000) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_R5432) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_R8000) += c-r4k.o cex-gen.o tlb-r8k.o +obj-$(CONFIG_CPU_RM7000) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_RM9000) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o +obj-$(CONFIG_CPU_TX39XX) += c-tx39.o tlb-r3k.o +obj-$(CONFIG_CPU_TX49XX) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_VR41XX) += c-r4k.o cex-gen.o tlb-r4k.o obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o obj-$(CONFIG_R5000_CPU_SCACHE) += sc-r5k.o diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 77aefb4ebed..643c8bcffff 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -14,6 +14,7 @@ #include <linux/linkage.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/module.h> #include <linux/bitops.h> #include <asm/bcache.h> @@ -53,6 +54,12 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info, preempt_enable(); } +#if defined(CONFIG_MIPS_CMP) +#define cpu_has_safe_index_cacheops 0 +#else +#define cpu_has_safe_index_cacheops 1 +#endif + /* * Must die. */ @@ -481,6 +488,8 @@ static inline void local_r4k_flush_cache_page(void *args) if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) { r4k_blast_dcache_page(addr); + if (exec && !cpu_icache_snoops_remote_store) + r4k_blast_scache_page(addr); } if (exec) { if (vaddr && cpu_has_vtag_icache && mm == current->active_mm) { @@ -583,7 +592,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) * subset property so we have to flush the primary caches * explicitly */ - if (size >= dcache_size) { + if (cpu_has_safe_index_cacheops && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; @@ -606,7 +615,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) return; } - if (size >= dcache_size) { + if (cpu_has_safe_index_cacheops && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; @@ -968,6 +977,7 @@ static void __cpuinit probe_pcache(void) case CPU_24K: case CPU_34K: case CPU_74K: + case CPU_1004K: if ((read_c0_config7() & (1 << 16))) { /* effectively physically indexed dcache, thus no virtual aliases. */ @@ -1216,9 +1226,25 @@ void au1x00_fixup_config_od(void) } } +static int __cpuinitdata cca = -1; + +static int __init cca_setup(char *str) +{ + get_option(&str, &cca); + + return 1; +} + +__setup("cca=", cca_setup); + static void __cpuinit coherency_setup(void) { - change_c0_config(CONF_CM_CMASK, CONF_CM_DEFAULT); + if (cca < 0 || cca > 7) + cca = read_c0_config() & CONF_CM_CMASK; + _page_cachable_default = cca << _CACHE_SHIFT; + + pr_debug("Using cache attribute %d\n", cca); + change_c0_config(CONF_CM_CMASK, cca); /* * c0_status.cu=0 specifies that updates by the sc instruction use @@ -1248,6 +1274,20 @@ static void __cpuinit coherency_setup(void) } } +#if defined(CONFIG_DMA_NONCOHERENT) + +static int __cpuinitdata coherentio; + +static int __init setcoherentio(char *str) +{ + coherentio = 1; + + return 1; +} + +__setup("coherentio", setcoherentio); +#endif + void __cpuinit r4k_cache_init(void) { extern void build_clear_page(void); @@ -1307,14 +1347,22 @@ void __cpuinit r4k_cache_init(void) flush_data_cache_page = r4k_flush_data_cache_page; flush_icache_range = r4k_flush_icache_range; -#ifdef CONFIG_DMA_NONCOHERENT - _dma_cache_wback_inv = r4k_dma_cache_wback_inv; - _dma_cache_wback = r4k_dma_cache_wback_inv; - _dma_cache_inv = r4k_dma_cache_inv; +#if defined(CONFIG_DMA_NONCOHERENT) + if (coherentio) { + _dma_cache_wback_inv = (void *)cache_noop; + _dma_cache_wback = (void *)cache_noop; + _dma_cache_inv = (void *)cache_noop; + } else { + _dma_cache_wback_inv = r4k_dma_cache_wback_inv; + _dma_cache_wback = r4k_dma_cache_wback_inv; + _dma_cache_inv = r4k_dma_cache_inv; + } #endif build_clear_page(); build_copy_page(); +#if !defined(CONFIG_MIPS_CMP) local_r4k___flush_cache_all(NULL); +#endif coherency_setup(); } diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index f5903679ee6..034e8506f6e 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -130,8 +130,28 @@ void __update_cache(struct vm_area_struct *vma, unsigned long address, } } -static char cache_panic[] __cpuinitdata = - "Yeee, unsupported cache architecture."; +unsigned long _page_cachable_default; +EXPORT_SYMBOL_GPL(_page_cachable_default); + +static inline void setup_protection_map(void) +{ + protection_map[0] = PAGE_NONE; + protection_map[1] = PAGE_READONLY; + protection_map[2] = PAGE_COPY; + protection_map[3] = PAGE_COPY; + protection_map[4] = PAGE_READONLY; + protection_map[5] = PAGE_READONLY; + protection_map[6] = PAGE_COPY; + protection_map[7] = PAGE_COPY; + protection_map[8] = PAGE_NONE; + protection_map[9] = PAGE_READONLY; + protection_map[10] = PAGE_SHARED; + protection_map[11] = PAGE_SHARED; + protection_map[12] = PAGE_READONLY; + protection_map[13] = PAGE_READONLY; + protection_map[14] = PAGE_SHARED; + protection_map[15] = PAGE_SHARED; +} void __devinit cpu_cache_init(void) { @@ -139,34 +159,29 @@ void __devinit cpu_cache_init(void) extern void __weak r3k_cache_init(void); r3k_cache_init(); - return; } if (cpu_has_6k_cache) { extern void __weak r6k_cache_init(void); r6k_cache_init(); - return; } if (cpu_has_4k_cache) { extern void __weak r4k_cache_init(void); r4k_cache_init(); - return; } if (cpu_has_8k_cache) { extern void __weak r8k_cache_init(void); r8k_cache_init(); - return; } if (cpu_has_tx39_cache) { extern void __weak tx39_cache_init(void); tx39_cache_init(); - return; } - panic(cache_panic); + setup_protection_map(); } int __weak __uncached_access(struct file *file, unsigned long addr) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index c7aed133d11..ecd562d2c34 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -142,7 +142,7 @@ void *kmap_coherent(struct page *page, unsigned long addr) #endif vaddr = __fix_to_virt(FIX_CMAP_END - idx); pte = mk_pte(page, PAGE_KERNEL); -#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1) +#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) entrylo = pte.pte_high; #else entrylo = pte_val(pte) >> 6; @@ -221,7 +221,7 @@ void copy_user_highpage(struct page *to, struct page *from, copy_page(vto, vfrom); kunmap_atomic(vfrom, KM_USER0); } - if (((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc) || + if ((!cpu_has_ic_fills_f_dc) || pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK)) flush_data_cache_page((unsigned long)vto); kunmap_atomic(vto, KM_USER1); @@ -229,8 +229,6 @@ void copy_user_highpage(struct page *to, struct page *from, smp_wmb(); } -EXPORT_SYMBOL(copy_user_highpage); - void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, unsigned long len) @@ -249,8 +247,6 @@ void copy_to_user_page(struct vm_area_struct *vma, flush_cache_page(vma, vaddr, page_to_pfn(page)); } -EXPORT_SYMBOL(copy_to_user_page); - void copy_from_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, unsigned long len) @@ -267,9 +263,6 @@ void copy_from_user_page(struct vm_area_struct *vma, } } -EXPORT_SYMBOL(copy_from_user_page); - - #ifdef CONFIG_HIGHMEM unsigned long highstart_pfn, highend_pfn; diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c new file mode 100644 index 00000000000..d827d614436 --- /dev/null +++ b/arch/mips/mm/page.c @@ -0,0 +1,684 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2007 Maciej W. Rozycki + * Copyright (C) 2008 Thiemo Seufer + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/proc_fs.h> + +#include <asm/bugs.h> +#include <asm/cacheops.h> +#include <asm/inst.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/prefetch.h> +#include <asm/system.h> +#include <asm/bootinfo.h> +#include <asm/mipsregs.h> +#include <asm/mmu_context.h> +#include <asm/cpu.h> +#include <asm/war.h> + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +#include <asm/sibyte/sb1250.h> +#include <asm/sibyte/sb1250_regs.h> +#include <asm/sibyte/sb1250_dma.h> +#endif + +#include "uasm.h" + +/* Registers used in the assembled routines. */ +#define ZERO 0 +#define AT 2 +#define A0 4 +#define A1 5 +#define A2 6 +#define T0 8 +#define T1 9 +#define T2 10 +#define T3 11 +#define T9 25 +#define RA 31 + +/* Handle labels (which must be positive integers). */ +enum label_id { + label_clear_nopref = 1, + label_clear_pref, + label_copy_nopref, + label_copy_pref_both, + label_copy_pref_store, +}; + +UASM_L_LA(_clear_nopref) +UASM_L_LA(_clear_pref) +UASM_L_LA(_copy_nopref) +UASM_L_LA(_copy_pref_both) +UASM_L_LA(_copy_pref_store) + +/* We need one branch and therefore one relocation per target label. */ +static struct uasm_label __cpuinitdata labels[5]; +static struct uasm_reloc __cpuinitdata relocs[5]; + +#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) +#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x058 bytes + * R4600 v1.7: 0x05c bytes + * R4600 v2.0: 0x060 bytes + * With prefetching, 16 word strides 0x120 bytes + */ + +static u32 clear_page_array[0x120 / 4]; + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +void clear_page_cpu(void *page) __attribute__((alias("clear_page_array"))); +#else +void clear_page(void *page) __attribute__((alias("clear_page_array"))); +#endif + +EXPORT_SYMBOL(clear_page); + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x11c bytes + * R4600 v1.7: 0x080 bytes + * R4600 v2.0: 0x07c bytes + * With prefetching, 16 word strides 0x540 bytes + */ +static u32 copy_page_array[0x540 / 4]; + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +void +copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array"))); +#else +void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); +#endif + +EXPORT_SYMBOL(copy_page); + + +static int pref_bias_clear_store __cpuinitdata; +static int pref_bias_copy_load __cpuinitdata; +static int pref_bias_copy_store __cpuinitdata; + +static u32 pref_src_mode __cpuinitdata; +static u32 pref_dst_mode __cpuinitdata; + +static int clear_word_size __cpuinitdata; +static int copy_word_size __cpuinitdata; + +static int half_clear_loop_size __cpuinitdata; +static int half_copy_loop_size __cpuinitdata; + +static int cache_line_size __cpuinitdata; +#define cache_line_mask() (cache_line_size - 1) + +static inline void __cpuinit +pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) +{ + if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { + if (off > 0x7fff) { + uasm_i_lui(buf, T9, uasm_rel_hi(off)); + uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); + } else + uasm_i_addiu(buf, T9, ZERO, off); + uasm_i_daddu(buf, reg1, reg2, T9); + } else { + if (off > 0x7fff) { + uasm_i_lui(buf, T9, uasm_rel_hi(off)); + uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); + UASM_i_ADDU(buf, reg1, reg2, T9); + } else + UASM_i_ADDIU(buf, reg1, reg2, off); + } +} + +static void __cpuinit set_prefetch_parameters(void) +{ + if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) + clear_word_size = 8; + else + clear_word_size = 4; + + if (cpu_has_64bit_gp_regs) + copy_word_size = 8; + else + copy_word_size = 4; + + /* + * The pref's used here are using "streaming" hints, which cause the + * copied data to be kicked out of the cache sooner. A page copy often + * ends up copying a lot more data than is commonly used, so this seems + * to make sense in terms of reducing cache pollution, but I've no real + * performance data to back this up. + */ + if (cpu_has_prefetch) { + /* + * XXX: Most prefetch bias values in here are based on + * guesswork. + */ + cache_line_size = cpu_dcache_line_size(); + switch (current_cpu_type()) { + case CPU_TX49XX: + /* TX49 supports only Pref_Load */ + pref_bias_copy_load = 256; + break; + + case CPU_RM9000: + /* + * As a workaround for erratum G105 which make the + * PrepareForStore hint unusable we fall back to + * StoreRetained on the RM9000. Once it is known which + * versions of the RM9000 we'll be able to condition- + * alize this. + */ + + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: + /* + * Those values have been experimentally tuned for an + * Origin 200. + */ + pref_bias_clear_store = 512; + pref_bias_copy_load = 256; + pref_bias_copy_store = 256; + pref_src_mode = Pref_LoadStreamed; + pref_dst_mode = Pref_StoreStreamed; + break; + + case CPU_SB1: + case CPU_SB1A: + pref_bias_clear_store = 128; + pref_bias_copy_load = 128; + pref_bias_copy_store = 128; + /* + * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed + * hints are broken. + */ + if (current_cpu_type() == CPU_SB1 && + (current_cpu_data.processor_id & 0xff) < 0x02) { + pref_src_mode = Pref_Load; + pref_dst_mode = Pref_Store; + } else { + pref_src_mode = Pref_LoadStreamed; + pref_dst_mode = Pref_StoreStreamed; + } + break; + + default: + pref_bias_clear_store = 128; + pref_bias_copy_load = 256; + pref_bias_copy_store = 128; + pref_src_mode = Pref_LoadStreamed; + pref_dst_mode = Pref_PrepareForStore; + break; + } + } else { + if (cpu_has_cache_cdex_s) + cache_line_size = cpu_scache_line_size(); + else if (cpu_has_cache_cdex_p) + cache_line_size = cpu_dcache_line_size(); + } + /* + * Too much unrolling will overflow the available space in + * clear_space_array / copy_page_array. 8 words sounds generous, + * but a R4000 with 128 byte L2 line length can exceed even that. + */ + half_clear_loop_size = min(8 * clear_word_size, + max(cache_line_size >> 1, + 4 * clear_word_size)); + half_copy_loop_size = min(8 * copy_word_size, + max(cache_line_size >> 1, + 4 * copy_word_size)); +} + +static void __cpuinit build_clear_store(u32 **buf, int off) +{ + if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) { + uasm_i_sd(buf, ZERO, off, A0); + } else { + uasm_i_sw(buf, ZERO, off, A0); + } +} + +static inline void __cpuinit build_clear_pref(u32 **buf, int off) +{ + if (off & cache_line_mask()) + return; + + if (pref_bias_clear_store) { + uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, + A0); + } else if (cpu_has_cache_cdex_s) { + uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); + } else if (cpu_has_cache_cdex_p) { + if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + } + + if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) + uasm_i_lw(buf, ZERO, ZERO, AT); + + uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); + } +} + +void __cpuinit build_clear_page(void) +{ + int off; + u32 *buf = (u32 *)&clear_page_array; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + int i; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + set_prefetch_parameters(); + + /* + * This algorithm makes the following assumptions: + * - The prefetch bias is a multiple of 2 words. + * - The prefetch bias is less than one page. + */ + BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); + BUG_ON(PAGE_SIZE < pref_bias_clear_store); + + off = PAGE_SIZE - pref_bias_clear_store; + if (off > 0xffff || !pref_bias_clear_store) + pg_addiu(&buf, A2, A0, off); + else + uasm_i_ori(&buf, A2, A0, off); + + if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) + uasm_i_lui(&buf, AT, 0xa000); + + off = min(8, pref_bias_clear_store / cache_line_size) * + cache_line_size; + while (off) { + build_clear_pref(&buf, -off); + off -= cache_line_size; + } + uasm_l_clear_pref(&l, buf); + do { + build_clear_pref(&buf, off); + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < half_clear_loop_size); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_clear_pref(&buf, off); + if (off == -clear_word_size) + uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < 0); + + if (pref_bias_clear_store) { + pg_addiu(&buf, A2, A0, pref_bias_clear_store); + uasm_l_clear_nopref(&l, buf); + off = 0; + do { + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < half_clear_loop_size); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + if (off == -clear_word_size) + uasm_il_bne(&buf, &r, A0, A2, + label_clear_nopref); + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < 0); + } + + uasm_i_jr(&buf, RA); + uasm_i_nop(&buf); + + BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array)); + + uasm_resolve_relocs(relocs, labels); + + pr_debug("Synthesized clear page handler (%u instructions).\n", + (u32)(buf - clear_page_array)); + + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); + for (i = 0; i < (buf - clear_page_array); i++) + pr_debug("\t.word 0x%08x\n", clear_page_array[i]); + pr_debug("\t.set pop\n"); +} + +static void __cpuinit build_copy_load(u32 **buf, int reg, int off) +{ + if (cpu_has_64bit_gp_regs) { + uasm_i_ld(buf, reg, off, A1); + } else { + uasm_i_lw(buf, reg, off, A1); + } +} + +static void __cpuinit build_copy_store(u32 **buf, int reg, int off) +{ + if (cpu_has_64bit_gp_regs) { + uasm_i_sd(buf, reg, off, A0); + } else { + uasm_i_sw(buf, reg, off, A0); + } +} + +static inline void build_copy_load_pref(u32 **buf, int off) +{ + if (off & cache_line_mask()) + return; + + if (pref_bias_copy_load) + uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1); +} + +static inline void build_copy_store_pref(u32 **buf, int off) +{ + if (off & cache_line_mask()) + return; + + if (pref_bias_copy_store) { + uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, + A0); + } else if (cpu_has_cache_cdex_s) { + uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); + } else if (cpu_has_cache_cdex_p) { + if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + } + + if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) + uasm_i_lw(buf, ZERO, ZERO, AT); + + uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); + } +} + +void __cpuinit build_copy_page(void) +{ + int off; + u32 *buf = (u32 *)©_page_array; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + int i; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + set_prefetch_parameters(); + + /* + * This algorithm makes the following assumptions: + * - All prefetch biases are multiples of 8 words. + * - The prefetch biases are less than one page. + * - The store prefetch bias isn't greater than the load + * prefetch bias. + */ + BUG_ON(pref_bias_copy_load % (8 * copy_word_size)); + BUG_ON(pref_bias_copy_store % (8 * copy_word_size)); + BUG_ON(PAGE_SIZE < pref_bias_copy_load); + BUG_ON(pref_bias_copy_store > pref_bias_copy_load); + + off = PAGE_SIZE - pref_bias_copy_load; + if (off > 0xffff || !pref_bias_copy_load) + pg_addiu(&buf, A2, A0, off); + else + uasm_i_ori(&buf, A2, A0, off); + + if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) + uasm_i_lui(&buf, AT, 0xa000); + + off = min(8, pref_bias_copy_load / cache_line_size) * cache_line_size; + while (off) { + build_copy_load_pref(&buf, -off); + off -= cache_line_size; + } + off = min(8, pref_bias_copy_store / cache_line_size) * cache_line_size; + while (off) { + build_copy_store_pref(&buf, -off); + off -= cache_line_size; + } + uasm_l_copy_pref_both(&l, buf); + do { + build_copy_load_pref(&buf, off); + build_copy_load(&buf, T0, off); + build_copy_load_pref(&buf, off + copy_word_size); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load_pref(&buf, off + 2 * copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load_pref(&buf, off + 3 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < half_copy_loop_size); + pg_addiu(&buf, A1, A1, 2 * off); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_copy_load_pref(&buf, off); + build_copy_load(&buf, T0, off); + build_copy_load_pref(&buf, off + copy_word_size); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load_pref(&buf, off + 2 * copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load_pref(&buf, off + 3 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + if (off == -(4 * copy_word_size)) + uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < 0); + + if (pref_bias_copy_load - pref_bias_copy_store) { + pg_addiu(&buf, A2, A0, + pref_bias_copy_load - pref_bias_copy_store); + uasm_l_copy_pref_store(&l, buf); + off = 0; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < half_copy_loop_size); + pg_addiu(&buf, A1, A1, 2 * off); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + if (off == -(4 * copy_word_size)) + uasm_il_bne(&buf, &r, A2, A0, + label_copy_pref_store); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < 0); + } + + if (pref_bias_copy_store) { + pg_addiu(&buf, A2, A0, pref_bias_copy_store); + uasm_l_copy_nopref(&l, buf); + off = 0; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store(&buf, T0, off); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < half_copy_loop_size); + pg_addiu(&buf, A1, A1, 2 * off); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store(&buf, T0, off); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + if (off == -(4 * copy_word_size)) + uasm_il_bne(&buf, &r, A2, A0, + label_copy_nopref); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < 0); + } + + uasm_i_jr(&buf, RA); + uasm_i_nop(&buf); + + BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array)); + + uasm_resolve_relocs(relocs, labels); + + pr_debug("Synthesized copy page handler (%u instructions).\n", + (u32)(buf - copy_page_array)); + + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); + for (i = 0; i < (buf - copy_page_array); i++) + pr_debug("\t.word 0x%08x\n", copy_page_array[i]); + pr_debug("\t.set pop\n"); +} + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS + +/* + * Pad descriptors to cacheline, since each is exclusively owned by a + * particular CPU. + */ +struct dmadscr { + u64 dscr_a; + u64 dscr_b; + u64 pad_a; + u64 pad_b; +} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS]; + +void sb1_dma_init(void) +{ + int i; + + for (i = 0; i < DM_NUM_CHANNELS; i++) { + const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) | + V_DM_DSCR_BASE_RINGSZ(1); + void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); + + __raw_writeq(base_val, base_reg); + __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); + __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); + } +} + +void clear_page(void *page) +{ + u64 to_phys = CPHYSADDR((unsigned long)page); + unsigned int cpu = smp_processor_id(); + + /* if the page is not in KSEG0, use old way */ + if ((long)KSEGX((unsigned long)page) != (long)CKSEG0) + return clear_page_cpu(page); + + page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | + M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; + page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); + __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); + + /* + * Don't really want to do it this way, but there's no + * reliable way to delay completion detection. + */ + while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) + & M_DM_DSCR_BASE_INTERRUPT)) + ; + __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); +} + +void copy_page(void *to, void *from) +{ + u64 from_phys = CPHYSADDR((unsigned long)from); + u64 to_phys = CPHYSADDR((unsigned long)to); + unsigned int cpu = smp_processor_id(); + + /* if any page is not in KSEG0, use old way */ + if ((long)KSEGX((unsigned long)to) != (long)CKSEG0 + || (long)KSEGX((unsigned long)from) != (long)CKSEG0) + return copy_page_cpu(to, from); + + page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | + M_DM_DSCRA_INTERRUPT; + page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); + __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); + + /* + * Don't really want to do it this way, but there's no + * reliable way to delay completion detection. + */ + while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) + & M_DM_DSCR_BASE_INTERRUPT)) + ; + __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); +} + +#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */ diff --git a/arch/mips/mm/pg-r4k.c b/arch/mips/mm/pg-r4k.c deleted file mode 100644 index 455dedb5b39..00000000000 --- a/arch/mips/mm/pg-r4k.c +++ /dev/null @@ -1,534 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) - * Copyright (C) 2007 Maciej W. Rozycki - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/proc_fs.h> - -#include <asm/bugs.h> -#include <asm/cacheops.h> -#include <asm/inst.h> -#include <asm/io.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/prefetch.h> -#include <asm/system.h> -#include <asm/bootinfo.h> -#include <asm/mipsregs.h> -#include <asm/mmu_context.h> -#include <asm/cpu.h> -#include <asm/war.h> - -#define half_scache_line_size() (cpu_scache_line_size() >> 1) -#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) -#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) - - -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache: 0x58 bytes - * R4600 v1.7: 0x5c bytes - * R4600 v2.0: 0x60 bytes - * With prefetching, 16 byte strides 0xa0 bytes - */ - -static unsigned int clear_page_array[0x130 / 4]; - -void clear_page(void * page) __attribute__((alias("clear_page_array"))); - -EXPORT_SYMBOL(clear_page); - -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache: 0x11c bytes - * R4600 v1.7: 0x080 bytes - * R4600 v2.0: 0x07c bytes - * With prefetching, 16 byte strides 0x0b8 bytes - */ -static unsigned int copy_page_array[0x148 / 4]; - -void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); - -EXPORT_SYMBOL(copy_page); - -/* - * This is suboptimal for 32-bit kernels; we assume that R10000 is only used - * with 64-bit kernels. The prefetch offsets have been experimentally tuned - * an Origin 200. - */ -static int pref_offset_clear __cpuinitdata = 512; -static int pref_offset_copy __cpuinitdata = 256; - -static unsigned int pref_src_mode __cpuinitdata; -static unsigned int pref_dst_mode __cpuinitdata; - -static int load_offset __cpuinitdata; -static int store_offset __cpuinitdata; - -static unsigned int __cpuinitdata *dest, *epc; - -static unsigned int instruction_pending; -static union mips_instruction delayed_mi; - -static void __cpuinit emit_instruction(union mips_instruction mi) -{ - if (instruction_pending) - *epc++ = delayed_mi.word; - - instruction_pending = 1; - delayed_mi = mi; -} - -static inline void flush_delay_slot_or_nop(void) -{ - if (instruction_pending) { - *epc++ = delayed_mi.word; - instruction_pending = 0; - return; - } - - *epc++ = 0; -} - -static inline unsigned int *label(void) -{ - if (instruction_pending) { - *epc++ = delayed_mi.word; - instruction_pending = 0; - } - - return epc; -} - -static inline void build_insn_word(unsigned int word) -{ - union mips_instruction mi; - - mi.word = word; - - emit_instruction(mi); -} - -static inline void build_nop(void) -{ - build_insn_word(0); /* nop */ -} - -static inline void build_src_pref(int advance) -{ - if (!(load_offset & (cpu_dcache_line_size() - 1)) && advance) { - union mips_instruction mi; - - mi.i_format.opcode = pref_op; - mi.i_format.rs = 5; /* $a1 */ - mi.i_format.rt = pref_src_mode; - mi.i_format.simmediate = load_offset + advance; - - emit_instruction(mi); - } -} - -static inline void __build_load_reg(int reg) -{ - union mips_instruction mi; - unsigned int width; - - if (cpu_has_64bit_gp_regs) { - mi.i_format.opcode = ld_op; - width = 8; - } else { - mi.i_format.opcode = lw_op; - width = 4; - } - mi.i_format.rs = 5; /* $a1 */ - mi.i_format.rt = reg; /* $reg */ - mi.i_format.simmediate = load_offset; - - load_offset += width; - emit_instruction(mi); -} - -static inline void build_load_reg(int reg) -{ - if (cpu_has_prefetch) - build_src_pref(pref_offset_copy); - - __build_load_reg(reg); -} - -static inline void build_dst_pref(int advance) -{ - if (!(store_offset & (cpu_dcache_line_size() - 1)) && advance) { - union mips_instruction mi; - - mi.i_format.opcode = pref_op; - mi.i_format.rs = 4; /* $a0 */ - mi.i_format.rt = pref_dst_mode; - mi.i_format.simmediate = store_offset + advance; - - emit_instruction(mi); - } -} - -static inline void build_cdex_s(void) -{ - union mips_instruction mi; - - if ((store_offset & (cpu_scache_line_size() - 1))) - return; - - mi.c_format.opcode = cache_op; - mi.c_format.rs = 4; /* $a0 */ - mi.c_format.c_op = 3; /* Create Dirty Exclusive */ - mi.c_format.cache = 3; /* Secondary Data Cache */ - mi.c_format.simmediate = store_offset; - - emit_instruction(mi); -} - -static inline void build_cdex_p(void) -{ - union mips_instruction mi; - - if (store_offset & (cpu_dcache_line_size() - 1)) - return; - - if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { - build_nop(); - build_nop(); - build_nop(); - build_nop(); - } - - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - build_insn_word(0x8c200000); /* lw $zero, ($at) */ - - mi.c_format.opcode = cache_op; - mi.c_format.rs = 4; /* $a0 */ - mi.c_format.c_op = 3; /* Create Dirty Exclusive */ - mi.c_format.cache = 1; /* Data Cache */ - mi.c_format.simmediate = store_offset; - - emit_instruction(mi); -} - -static void __cpuinit __build_store_reg(int reg) -{ - union mips_instruction mi; - unsigned int width; - - if (cpu_has_64bit_gp_regs || - (cpu_has_64bit_zero_reg && reg == 0)) { - mi.i_format.opcode = sd_op; - width = 8; - } else { - mi.i_format.opcode = sw_op; - width = 4; - } - mi.i_format.rs = 4; /* $a0 */ - mi.i_format.rt = reg; /* $reg */ - mi.i_format.simmediate = store_offset; - - store_offset += width; - emit_instruction(mi); -} - -static inline void build_store_reg(int reg) -{ - int pref_off = cpu_has_prefetch ? - (reg ? pref_offset_copy : pref_offset_clear) : 0; - if (pref_off) - build_dst_pref(pref_off); - else if (cpu_has_cache_cdex_s) - build_cdex_s(); - else if (cpu_has_cache_cdex_p) - build_cdex_p(); - - __build_store_reg(reg); -} - -static inline void build_addiu_rt_rs(unsigned int rt, unsigned int rs, - unsigned long offset) -{ - union mips_instruction mi; - - BUG_ON(offset > 0x7fff); - - if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { - mi.i_format.opcode = addiu_op; - mi.i_format.rs = 0; /* $zero */ - mi.i_format.rt = 25; /* $t9 */ - mi.i_format.simmediate = offset; - emit_instruction(mi); - - mi.r_format.opcode = spec_op; - mi.r_format.rs = rs; - mi.r_format.rt = 25; /* $t9 */ - mi.r_format.rd = rt; - mi.r_format.re = 0; - mi.r_format.func = daddu_op; - } else { - mi.i_format.opcode = cpu_has_64bit_gp_regs ? - daddiu_op : addiu_op; - mi.i_format.rs = rs; - mi.i_format.rt = rt; - mi.i_format.simmediate = offset; - } - emit_instruction(mi); -} - -static inline void build_addiu_a2_a0(unsigned long offset) -{ - build_addiu_rt_rs(6, 4, offset); /* $a2, $a0, offset */ -} - -static inline void build_addiu_a2(unsigned long offset) -{ - build_addiu_rt_rs(6, 6, offset); /* $a2, $a2, offset */ -} - -static inline void build_addiu_a1(unsigned long offset) -{ - build_addiu_rt_rs(5, 5, offset); /* $a1, $a1, offset */ - - load_offset -= offset; -} - -static inline void build_addiu_a0(unsigned long offset) -{ - build_addiu_rt_rs(4, 4, offset); /* $a0, $a0, offset */ - - store_offset -= offset; -} - -static inline void build_bne(unsigned int *dest) -{ - union mips_instruction mi; - - mi.i_format.opcode = bne_op; - mi.i_format.rs = 6; /* $a2 */ - mi.i_format.rt = 4; /* $a0 */ - mi.i_format.simmediate = dest - epc - 1; - - *epc++ = mi.word; - flush_delay_slot_or_nop(); -} - -static inline void build_jr_ra(void) -{ - union mips_instruction mi; - - mi.r_format.opcode = spec_op; - mi.r_format.rs = 31; - mi.r_format.rt = 0; - mi.r_format.rd = 0; - mi.r_format.re = 0; - mi.r_format.func = jr_op; - - *epc++ = mi.word; - flush_delay_slot_or_nop(); -} - -void __cpuinit build_clear_page(void) -{ - unsigned int loop_start; - unsigned long off; - int i; - - epc = (unsigned int *) &clear_page_array; - instruction_pending = 0; - store_offset = 0; - - if (cpu_has_prefetch) { - switch (current_cpu_type()) { - case CPU_TX49XX: - /* TX49 supports only Pref_Load */ - pref_offset_clear = 0; - pref_offset_copy = 0; - break; - - case CPU_RM9000: - /* - * As a workaround for erratum G105 which make the - * PrepareForStore hint unusable we fall back to - * StoreRetained on the RM9000. Once it is known which - * versions of the RM9000 we'll be able to condition- - * alize this. - */ - - case CPU_R10000: - case CPU_R12000: - case CPU_R14000: - pref_src_mode = Pref_LoadStreamed; - pref_dst_mode = Pref_StoreStreamed; - break; - - default: - pref_src_mode = Pref_LoadStreamed; - pref_dst_mode = Pref_PrepareForStore; - break; - } - } - - off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0); - if (off > 0x7fff) { - build_addiu_a2_a0(off >> 1); - build_addiu_a2(off >> 1); - } else - build_addiu_a2_a0(off); - - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - build_insn_word(0x3c01a000); /* lui $at, 0xa000 */ - -dest = label(); - do { - build_store_reg(0); - build_store_reg(0); - build_store_reg(0); - build_store_reg(0); - } while (store_offset < half_scache_line_size()); - build_addiu_a0(2 * store_offset); - loop_start = store_offset; - do { - build_store_reg(0); - build_store_reg(0); - build_store_reg(0); - build_store_reg(0); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_bne(dest); - - if (cpu_has_prefetch && pref_offset_clear) { - build_addiu_a2_a0(pref_offset_clear); - dest = label(); - loop_start = store_offset; - do { - __build_store_reg(0); - __build_store_reg(0); - __build_store_reg(0); - __build_store_reg(0); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_addiu_a0(2 * store_offset); - loop_start = store_offset; - do { - __build_store_reg(0); - __build_store_reg(0); - __build_store_reg(0); - __build_store_reg(0); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_bne(dest); - } - - build_jr_ra(); - - BUG_ON(epc > clear_page_array + ARRAY_SIZE(clear_page_array)); - - pr_info("Synthesized clear page handler (%u instructions).\n", - (unsigned int)(epc - clear_page_array)); - - pr_debug("\t.set push\n"); - pr_debug("\t.set noreorder\n"); - for (i = 0; i < (epc - clear_page_array); i++) - pr_debug("\t.word 0x%08x\n", clear_page_array[i]); - pr_debug("\t.set pop\n"); -} - -void __cpuinit build_copy_page(void) -{ - unsigned int loop_start; - unsigned long off; - int i; - - epc = (unsigned int *) ©_page_array; - store_offset = load_offset = 0; - instruction_pending = 0; - - off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0); - if (off > 0x7fff) { - build_addiu_a2_a0(off >> 1); - build_addiu_a2(off >> 1); - } else - build_addiu_a2_a0(off); - - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - build_insn_word(0x3c01a000); /* lui $at, 0xa000 */ - -dest = label(); - loop_start = store_offset; - do { - build_load_reg( 8); - build_load_reg( 9); - build_load_reg(10); - build_load_reg(11); - build_store_reg( 8); - build_store_reg( 9); - build_store_reg(10); - build_store_reg(11); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_addiu_a0(2 * store_offset); - build_addiu_a1(2 * load_offset); - loop_start = store_offset; - do { - build_load_reg( 8); - build_load_reg( 9); - build_load_reg(10); - build_load_reg(11); - build_store_reg( 8); - build_store_reg( 9); - build_store_reg(10); - build_store_reg(11); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_bne(dest); - - if (cpu_has_prefetch && pref_offset_copy) { - build_addiu_a2_a0(pref_offset_copy); - dest = label(); - loop_start = store_offset; - do { - __build_load_reg( 8); - __build_load_reg( 9); - __build_load_reg(10); - __build_load_reg(11); - __build_store_reg( 8); - __build_store_reg( 9); - __build_store_reg(10); - __build_store_reg(11); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_addiu_a0(2 * store_offset); - build_addiu_a1(2 * load_offset); - loop_start = store_offset; - do { - __build_load_reg( 8); - __build_load_reg( 9); - __build_load_reg(10); - __build_load_reg(11); - __build_store_reg( 8); - __build_store_reg( 9); - __build_store_reg(10); - __build_store_reg(11); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_bne(dest); - } - - build_jr_ra(); - - BUG_ON(epc > copy_page_array + ARRAY_SIZE(copy_page_array)); - - pr_info("Synthesized copy page handler (%u instructions).\n", - (unsigned int)(epc - copy_page_array)); - - pr_debug("\t.set push\n"); - pr_debug("\t.set noreorder\n"); - for (i = 0; i < (epc - copy_page_array); i++) - pr_debug("\t.word 0x%08x\n", copy_page_array[i]); - pr_debug("\t.set pop\n"); -} diff --git a/arch/mips/mm/pg-sb1.c b/arch/mips/mm/pg-sb1.c deleted file mode 100644 index 49e289d0541..00000000000 --- a/arch/mips/mm/pg-sb1.c +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) - * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org) - * Copyright (C) 2000 SiByte, Inc. - * Copyright (C) 2005 Thiemo Seufer - * - * Written by Justin Carlson of SiByte, Inc. - * and Kip Walker of Broadcom Corp. - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/smp.h> - -#include <asm/io.h> -#include <asm/sibyte/sb1250.h> -#include <asm/sibyte/sb1250_regs.h> -#include <asm/sibyte/sb1250_dma.h> - -#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS -#define SB1_PREF_LOAD_STREAMED_HINT "0" -#define SB1_PREF_STORE_STREAMED_HINT "1" -#else -#define SB1_PREF_LOAD_STREAMED_HINT "4" -#define SB1_PREF_STORE_STREAMED_HINT "5" -#endif - -static inline void clear_page_cpu(void *page) -{ - unsigned char *addr = (unsigned char *) page; - unsigned char *end = addr + PAGE_SIZE; - - /* - * JDCXXX - This should be bottlenecked by the write buffer, but these - * things tend to be mildly unpredictable...should check this on the - * performance model - * - * We prefetch 4 lines ahead. We're also "cheating" slightly here... - * since we know we're on an SB1, we force the assembler to take - * 64-bit operands to speed things up - */ - __asm__ __volatile__( - " .set push \n" - " .set mips4 \n" - " .set noreorder \n" -#ifdef CONFIG_CPU_HAS_PREFETCH - " daddiu %0, %0, 128 \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%0) \n" - /* Prefetch the first 4 lines */ - " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n" - "1: sd $0, -128(%0) \n" /* Throw out a cacheline of 0's */ - " sd $0, -120(%0) \n" - " sd $0, -112(%0) \n" - " sd $0, -104(%0) \n" - " daddiu %0, %0, 32 \n" - " bnel %0, %1, 1b \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n" - " daddiu %0, %0, -128 \n" -#endif - " sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */ - "1: sd $0, 8(%0) \n" - " sd $0, 16(%0) \n" - " sd $0, 24(%0) \n" - " daddiu %0, %0, 32 \n" - " bnel %0, %1, 1b \n" - " sd $0, 0(%0) \n" - " .set pop \n" - : "+r" (addr) - : "r" (end) - : "memory"); -} - -static inline void copy_page_cpu(void *to, void *from) -{ - unsigned char *src = (unsigned char *)from; - unsigned char *dst = (unsigned char *)to; - unsigned char *end = src + PAGE_SIZE; - - /* - * The pref's used here are using "streaming" hints, which cause the - * copied data to be kicked out of the cache sooner. A page copy often - * ends up copying a lot more data than is commonly used, so this seems - * to make sense in terms of reducing cache pollution, but I've no real - * performance data to back this up - */ - __asm__ __volatile__( - " .set push \n" - " .set mips4 \n" - " .set noreorder \n" -#ifdef CONFIG_CPU_HAS_PREFETCH - " daddiu %0, %0, 128 \n" - " daddiu %1, %1, 128 \n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", -128(%0)\n" - /* Prefetch the first 4 lines */ - " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", -96(%0)\n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%1)\n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", -64(%0)\n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%1)\n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n" - "1: pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%1)\n" -# ifdef CONFIG_64BIT - " ld $8, -128(%0) \n" /* Block copy a cacheline */ - " ld $9, -120(%0) \n" - " ld $10, -112(%0) \n" - " ld $11, -104(%0) \n" - " sd $8, -128(%1) \n" - " sd $9, -120(%1) \n" - " sd $10, -112(%1) \n" - " sd $11, -104(%1) \n" -# else - " lw $2, -128(%0) \n" /* Block copy a cacheline */ - " lw $3, -124(%0) \n" - " lw $6, -120(%0) \n" - " lw $7, -116(%0) \n" - " lw $8, -112(%0) \n" - " lw $9, -108(%0) \n" - " lw $10, -104(%0) \n" - " lw $11, -100(%0) \n" - " sw $2, -128(%1) \n" - " sw $3, -124(%1) \n" - " sw $6, -120(%1) \n" - " sw $7, -116(%1) \n" - " sw $8, -112(%1) \n" - " sw $9, -108(%1) \n" - " sw $10, -104(%1) \n" - " sw $11, -100(%1) \n" -# endif - " daddiu %0, %0, 32 \n" - " daddiu %1, %1, 32 \n" - " bnel %0, %2, 1b \n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n" - " daddiu %0, %0, -128 \n" - " daddiu %1, %1, -128 \n" -#endif -#ifdef CONFIG_64BIT - " ld $8, 0(%0) \n" /* Block copy a cacheline */ - "1: ld $9, 8(%0) \n" - " ld $10, 16(%0) \n" - " ld $11, 24(%0) \n" - " sd $8, 0(%1) \n" - " sd $9, 8(%1) \n" - " sd $10, 16(%1) \n" - " sd $11, 24(%1) \n" -#else - " lw $2, 0(%0) \n" /* Block copy a cacheline */ - "1: lw $3, 4(%0) \n" - " lw $6, 8(%0) \n" - " lw $7, 12(%0) \n" - " lw $8, 16(%0) \n" - " lw $9, 20(%0) \n" - " lw $10, 24(%0) \n" - " lw $11, 28(%0) \n" - " sw $2, 0(%1) \n" - " sw $3, 4(%1) \n" - " sw $6, 8(%1) \n" - " sw $7, 12(%1) \n" - " sw $8, 16(%1) \n" - " sw $9, 20(%1) \n" - " sw $10, 24(%1) \n" - " sw $11, 28(%1) \n" -#endif - " daddiu %0, %0, 32 \n" - " daddiu %1, %1, 32 \n" - " bnel %0, %2, 1b \n" -#ifdef CONFIG_64BIT - " ld $8, 0(%0) \n" -#else - " lw $2, 0(%0) \n" -#endif - " .set pop \n" - : "+r" (src), "+r" (dst) - : "r" (end) -#ifdef CONFIG_64BIT - : "$8", "$9", "$10", "$11", "memory"); -#else - : "$2", "$3", "$6", "$7", "$8", "$9", "$10", "$11", "memory"); -#endif -} - - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS - -/* - * Pad descriptors to cacheline, since each is exclusively owned by a - * particular CPU. - */ -typedef struct dmadscr_s { - u64 dscr_a; - u64 dscr_b; - u64 pad_a; - u64 pad_b; -} dmadscr_t; - -static dmadscr_t page_descr[DM_NUM_CHANNELS] - __attribute__((aligned(SMP_CACHE_BYTES))); - -void sb1_dma_init(void) -{ - int i; - - for (i = 0; i < DM_NUM_CHANNELS; i++) { - const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) | - V_DM_DSCR_BASE_RINGSZ(1); - void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); - - __raw_writeq(base_val, base_reg); - __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); - __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); - } -} - -void clear_page(void *page) -{ - u64 to_phys = CPHYSADDR((unsigned long)page); - unsigned int cpu = smp_processor_id(); - - /* if the page is not in KSEG0, use old way */ - if ((long)KSEGX((unsigned long)page) != (long)CKSEG0) - return clear_page_cpu(page); - - page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | - M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; - page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); - __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); - - /* - * Don't really want to do it this way, but there's no - * reliable way to delay completion detection. - */ - while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) - & M_DM_DSCR_BASE_INTERRUPT)) - ; - __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); -} - -void copy_page(void *to, void *from) -{ - u64 from_phys = CPHYSADDR((unsigned long)from); - u64 to_phys = CPHYSADDR((unsigned long)to); - unsigned int cpu = smp_processor_id(); - - /* if any page is not in KSEG0, use old way */ - if ((long)KSEGX((unsigned long)to) != (long)CKSEG0 - || (long)KSEGX((unsigned long)from) != (long)CKSEG0) - return copy_page_cpu(to, from); - - page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | - M_DM_DSCRA_INTERRUPT; - page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); - __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); - - /* - * Don't really want to do it this way, but there's no - * reliable way to delay completion detection. - */ - while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) - & M_DM_DSCR_BASE_INTERRUPT)) - ; - __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); -} - -#else /* !CONFIG_SIBYTE_DMA_PAGEOPS */ - -void clear_page(void *page) -{ - return clear_page_cpu(page); -} - -void copy_page(void *to, void *from) -{ - return copy_page_cpu(to, from); -} - -#endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */ - -EXPORT_SYMBOL(clear_page); -EXPORT_SYMBOL(copy_page); - -void __cpuinit build_clear_page(void) -{ -} - -void __cpuinit build_copy_page(void) -{ -} diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c index 57df1c38e30..7dfa579ab24 100644 --- a/arch/mips/mm/pgtable.c +++ b/arch/mips/mm/pgtable.c @@ -12,7 +12,6 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); pfn = max_mapnr; while (pfn-- > 0) { if (!pfn_valid(pfn)) diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 63065d6e806..5ce2fa74562 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -299,7 +299,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) idx = read_c0_index(); ptep = pte_offset_map(pmdp, address); -#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1) +#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) write_c0_entrylo0(ptep->pte_high); ptep++; write_c0_entrylo1(ptep->pte_high); diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c index 1a6f7704cc8..1655aa69e13 100644 --- a/arch/mips/mm/uasm.c +++ b/arch/mips/mm/uasm.c @@ -58,13 +58,13 @@ enum opcode { insn_invalid, insn_addu, insn_addiu, insn_and, insn_andi, insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, - insn_bne, insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0, - insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, - insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld, - insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, insn_mtc0, - insn_ori, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll, - insn_sra, insn_srl, insn_subu, insn_sw, insn_tlbp, insn_tlbwi, - insn_tlbwr, insn_xor, insn_xori + insn_bne, insn_cache, insn_daddu, insn_daddiu, insn_dmfc0, + insn_dmtc0, insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, + insn_dsrl32, insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, + insn_ld, insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, + insn_mtc0, insn_ori, insn_pref, insn_rfe, insn_sc, insn_scd, + insn_sd, insn_sll, insn_sra, insn_srl, insn_subu, insn_sw, + insn_tlbp, insn_tlbwi, insn_tlbwr, insn_xor, insn_xori }; struct insn { @@ -94,6 +94,7 @@ static struct insn insn_table[] __cpuinitdata = { { insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM }, { insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM }, { insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, + { insn_cache, M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD }, { insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET}, @@ -116,6 +117,7 @@ static struct insn insn_table[] __cpuinitdata = { { insn_mfc0, M(cop0_op, mfc_op, 0, 0, 0, 0), RT | RD | SET}, { insn_mtc0, M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET}, { insn_ori, M(ori_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, + { insn_pref, M(pref_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_rfe, M(cop0_op, cop_op, 0, 0, 0, rfe_op), 0 }, { insn_sc, M(sc_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_scd, M(scd_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, @@ -337,6 +339,7 @@ I_u1s2(_bgezl) I_u1s2(_bltz) I_u1s2(_bltzl) I_u1u2s3(_bne) +I_u2s3u1(_cache) I_u1u2u3(_dmfc0) I_u1u2u3(_dmtc0) I_u2u1s3(_daddiu) @@ -359,6 +362,7 @@ I_u2s3u1(_lw) I_u1u2u3(_mfc0) I_u1u2u3(_mtc0) I_u2u1u3(_ori) +I_u2s3u1(_pref) I_0(_rfe) I_u2s3u1(_sc) I_u2s3u1(_scd) @@ -555,6 +559,14 @@ uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) } void __cpuinit +uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1, + unsigned int reg2, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_bne(p, reg1, reg2, 0); +} + +void __cpuinit uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) { uasm_r_mips_pc16(r, *p, lid); diff --git a/arch/mips/mm/uasm.h b/arch/mips/mm/uasm.h index fe0574f6e77..0d6a66f3203 100644 --- a/arch/mips/mm/uasm.h +++ b/arch/mips/mm/uasm.h @@ -55,6 +55,7 @@ Ip_u1s2(_bgezl); Ip_u1s2(_bltz); Ip_u1s2(_bltzl); Ip_u1u2s3(_bne); +Ip_u2s3u1(_cache); Ip_u1u2u3(_dmfc0); Ip_u1u2u3(_dmtc0); Ip_u2u1s3(_daddiu); @@ -77,6 +78,7 @@ Ip_u2s3u1(_lw); Ip_u1u2u3(_mfc0); Ip_u1u2u3(_mtc0); Ip_u2u1u3(_ori); +Ip_u2s3u1(_pref); Ip_0(_rfe); Ip_u2s3u1(_sc); Ip_u2s3u1(_scd); @@ -177,6 +179,8 @@ void uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); void uasm_il_b(u32 **p, struct uasm_reloc **r, int lid); void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); +void uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1, + unsigned int reg2, int lid); void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); void uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); diff --git a/arch/mips/philips/pnx8550/common/Makefile b/arch/mips/nxp/pnx8550/common/Makefile index 31cc1a5cec3..31cc1a5cec3 100644 --- a/arch/mips/philips/pnx8550/common/Makefile +++ b/arch/mips/nxp/pnx8550/common/Makefile diff --git a/arch/mips/philips/pnx8550/common/gdb_hook.c b/arch/mips/nxp/pnx8550/common/gdb_hook.c index ad4624f6d9b..ad4624f6d9b 100644 --- a/arch/mips/philips/pnx8550/common/gdb_hook.c +++ b/arch/mips/nxp/pnx8550/common/gdb_hook.c diff --git a/arch/mips/philips/pnx8550/common/int.c b/arch/mips/nxp/pnx8550/common/int.c index aad03429a5e..aad03429a5e 100644 --- a/arch/mips/philips/pnx8550/common/int.c +++ b/arch/mips/nxp/pnx8550/common/int.c diff --git a/arch/mips/philips/pnx8550/common/pci.c b/arch/mips/nxp/pnx8550/common/pci.c index eee4f3dfc41..eee4f3dfc41 100644 --- a/arch/mips/philips/pnx8550/common/pci.c +++ b/arch/mips/nxp/pnx8550/common/pci.c diff --git a/arch/mips/philips/pnx8550/common/platform.c b/arch/mips/nxp/pnx8550/common/platform.c index c839436bd01..c7c763dbe58 100644 --- a/arch/mips/philips/pnx8550/common/platform.c +++ b/arch/mips/nxp/pnx8550/common/platform.c @@ -1,5 +1,5 @@ /* - * Platform device support for Philips PNX8550 SoCs + * Platform device support for NXP PNX8550 SoCs * * Copyright 2005, Embedded Alley Solutions, Inc * diff --git a/arch/mips/philips/pnx8550/common/proc.c b/arch/mips/nxp/pnx8550/common/proc.c index 18b125e3b65..18b125e3b65 100644 --- a/arch/mips/philips/pnx8550/common/proc.c +++ b/arch/mips/nxp/pnx8550/common/proc.c diff --git a/arch/mips/philips/pnx8550/common/prom.c b/arch/mips/nxp/pnx8550/common/prom.c index 2f567452e7a..2f567452e7a 100644 --- a/arch/mips/philips/pnx8550/common/prom.c +++ b/arch/mips/nxp/pnx8550/common/prom.c diff --git a/arch/mips/philips/pnx8550/common/reset.c b/arch/mips/nxp/pnx8550/common/reset.c index 7b2cbc5b2c7..7b2cbc5b2c7 100644 --- a/arch/mips/philips/pnx8550/common/reset.c +++ b/arch/mips/nxp/pnx8550/common/reset.c diff --git a/arch/mips/philips/pnx8550/common/setup.c b/arch/mips/nxp/pnx8550/common/setup.c index 92d764c9770..92d764c9770 100644 --- a/arch/mips/philips/pnx8550/common/setup.c +++ b/arch/mips/nxp/pnx8550/common/setup.c diff --git a/arch/mips/philips/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c index 62f495b57f9..62f495b57f9 100644 --- a/arch/mips/philips/pnx8550/common/time.c +++ b/arch/mips/nxp/pnx8550/common/time.c diff --git a/arch/mips/philips/pnx8550/jbs/Makefile b/arch/mips/nxp/pnx8550/jbs/Makefile index e8228dbca8f..ad6a8ca7d8c 100644 --- a/arch/mips/philips/pnx8550/jbs/Makefile +++ b/arch/mips/nxp/pnx8550/jbs/Makefile @@ -1,4 +1,4 @@ -# Makefile for the Philips JBS Board. +# Makefile for the NXP JBS Board. lib-y := init.o board_setup.o irqmap.o diff --git a/arch/mips/philips/pnx8550/jbs/board_setup.c b/arch/mips/nxp/pnx8550/jbs/board_setup.c index f92826e0096..f92826e0096 100644 --- a/arch/mips/philips/pnx8550/jbs/board_setup.c +++ b/arch/mips/nxp/pnx8550/jbs/board_setup.c diff --git a/arch/mips/philips/pnx8550/jbs/init.c b/arch/mips/nxp/pnx8550/jbs/init.c index 90b4d35f3ec..d59b4a4e5e8 100644 --- a/arch/mips/philips/pnx8550/jbs/init.c +++ b/arch/mips/nxp/pnx8550/jbs/init.c @@ -40,7 +40,7 @@ extern char *prom_getenv(char *envname); const char *get_system_type(void) { - return "Philips PNX8550/JBS"; + return "NXP PNX8550/JBS"; } void __init prom_init(void) diff --git a/arch/mips/philips/pnx8550/jbs/irqmap.c b/arch/mips/nxp/pnx8550/jbs/irqmap.c index 98c3429e6e5..7fc89842002 100644 --- a/arch/mips/philips/pnx8550/jbs/irqmap.c +++ b/arch/mips/nxp/pnx8550/jbs/irqmap.c @@ -1,5 +1,5 @@ /* - * Philips JBS board irqmap. + * NXP JBS board irqmap. * * Copyright 2005 Embedded Alley Solutions, Inc * source@embeddealley.com @@ -33,4 +33,3 @@ char pnx8550_irq_tab[][5] __initdata = { [9] = { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff}, [17] = { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff}, }; - diff --git a/arch/mips/philips/pnx8550/stb810/Makefile b/arch/mips/nxp/pnx8550/stb810/Makefile index f14b592af39..ab91d72c566 100644 --- a/arch/mips/philips/pnx8550/stb810/Makefile +++ b/arch/mips/nxp/pnx8550/stb810/Makefile @@ -1,4 +1,4 @@ -# Makefile for the Philips STB810 Board. +# Makefile for the NXP STB810 Board. lib-y := prom_init.o board_setup.o irqmap.o diff --git a/arch/mips/philips/pnx8550/stb810/board_setup.c b/arch/mips/nxp/pnx8550/stb810/board_setup.c index 345d71e53cf..1282c27cfcb 100644 --- a/arch/mips/philips/pnx8550/stb810/board_setup.c +++ b/arch/mips/nxp/pnx8550/stb810/board_setup.c @@ -1,7 +1,7 @@ /* * STB810 specific board startup routines. * - * Based on the arch/mips/philips/pnx8550/jbs/board_setup.c + * Based on the arch/mips/nxp/pnx8550/jbs/board_setup.c * * Author: MontaVista Software, Inc. * source@mvista.com diff --git a/arch/mips/philips/pnx8550/stb810/irqmap.c b/arch/mips/nxp/pnx8550/stb810/irqmap.c index 5ee11e19975..8c034963ddc 100644 --- a/arch/mips/philips/pnx8550/stb810/irqmap.c +++ b/arch/mips/nxp/pnx8550/stb810/irqmap.c @@ -1,5 +1,5 @@ /* - * Philips STB810 board irqmap. + * NXP STB810 board irqmap. * * Author: MontaVista Software, Inc. * source@mvista.com @@ -20,4 +20,3 @@ char pnx8550_irq_tab[][5] __initdata = { [9] = { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff}, [10] = { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff}, }; - diff --git a/arch/mips/philips/pnx8550/stb810/prom_init.c b/arch/mips/nxp/pnx8550/stb810/prom_init.c index 832dd60b0a7..ca7f4ada064 100644 --- a/arch/mips/philips/pnx8550/stb810/prom_init.c +++ b/arch/mips/nxp/pnx8550/stb810/prom_init.c @@ -28,7 +28,7 @@ extern char *prom_getenv(char *envname); const char *get_system_type(void) { - return "Philips PNX8550/STB810"; + return "NXP PNX8950/STB810"; } void __init prom_init(void) diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index aa52aa146ce..b5f6f71b27b 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -80,6 +80,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) case CPU_24K: case CPU_25KF: case CPU_34K: + case CPU_1004K: case CPU_74K: case CPU_SB1: case CPU_SB1A: diff --git a/arch/mips/oprofile/op_impl.h b/arch/mips/oprofile/op_impl.h index fa6b4aae752..2bfc17c3010 100644 --- a/arch/mips/oprofile/op_impl.h +++ b/arch/mips/oprofile/op_impl.h @@ -10,7 +10,6 @@ #ifndef OP_IMPL_H #define OP_IMPL_H 1 -extern int null_perf_irq(void); extern int (*perf_irq)(void); /* Per-counter configuration as set via oprofilefs. */ diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c index ccbea229a0e..da8cbb6899d 100644 --- a/arch/mips/oprofile/op_model_mipsxx.c +++ b/arch/mips/oprofile/op_model_mipsxx.c @@ -31,9 +31,14 @@ #define M_COUNTER_OVERFLOW (1UL << 31) +static int (*save_perf_irq)(void); + #ifdef CONFIG_MIPS_MT_SMP -#define WHAT (M_TC_EN_VPE | M_PERFCTL_VPEID(smp_processor_id())) -#define vpe_id() smp_processor_id() +static int cpu_has_mipsmt_pertccounters; +#define WHAT (M_TC_EN_VPE | \ + M_PERFCTL_VPEID(cpu_data[smp_processor_id()].vpe_id)) +#define vpe_id() (cpu_has_mipsmt_pertccounters ? \ + 0 : cpu_data[smp_processor_id()].vpe_id) /* * The number of bits to shift to convert between counters per core and @@ -243,11 +248,11 @@ static inline int __n_counters(void) { if (!(read_c0_config1() & M_CONFIG1_PC)) return 0; - if (!(r_c0_perfctrl0() & M_PERFCTL_MORE)) + if (!(read_c0_perfctrl0() & M_PERFCTL_MORE)) return 1; - if (!(r_c0_perfctrl1() & M_PERFCTL_MORE)) + if (!(read_c0_perfctrl1() & M_PERFCTL_MORE)) return 2; - if (!(r_c0_perfctrl2() & M_PERFCTL_MORE)) + if (!(read_c0_perfctrl2() & M_PERFCTL_MORE)) return 3; return 4; @@ -274,8 +279,9 @@ static inline int n_counters(void) return counters; } -static inline void reset_counters(int counters) +static void reset_counters(void *arg) { + int counters = (int)arg; switch (counters) { case 4: w_c0_perfctrl3(0); @@ -302,9 +308,12 @@ static int __init mipsxx_init(void) return -ENODEV; } - reset_counters(counters); - - counters = counters_total_to_per_cpu(counters); +#ifdef CONFIG_MIPS_MT_SMP + cpu_has_mipsmt_pertccounters = read_c0_config7() & (1<<19); + if (!cpu_has_mipsmt_pertccounters) + counters = counters_total_to_per_cpu(counters); +#endif + on_each_cpu(reset_counters, (void *)counters, 0, 1); op_model_mipsxx_ops.num_counters = counters; switch (current_cpu_type()) { @@ -320,6 +329,13 @@ static int __init mipsxx_init(void) op_model_mipsxx_ops.cpu_type = "mips/25K"; break; + case CPU_1004K: +#if 0 + /* FIXME: report as 34K for now */ + op_model_mipsxx_ops.cpu_type = "mips/1004K"; + break; +#endif + case CPU_34K: op_model_mipsxx_ops.cpu_type = "mips/34K"; break; @@ -355,6 +371,7 @@ static int __init mipsxx_init(void) return -ENODEV; } + save_perf_irq = perf_irq; perf_irq = mipsxx_perfcount_handler; return 0; @@ -365,9 +382,9 @@ static void mipsxx_exit(void) int counters = op_model_mipsxx_ops.num_counters; counters = counters_per_cpu_to_total(counters); - reset_counters(counters); + on_each_cpu(reset_counters, (void *)counters, 0, 1); - perf_irq = null_perf_irq; + perf_irq = save_perf_irq; } struct op_mips_model op_model_mipsxx_ops = { diff --git a/arch/mips/pci/fixup-au1000.c b/arch/mips/pci/fixup-au1000.c index ca0276c8070..00c36c9dbe0 100644 --- a/arch/mips/pci/fixup-au1000.c +++ b/arch/mips/pci/fixup-au1000.c @@ -26,13 +26,10 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/types.h> + #include <linux/pci.h> -#include <linux/kernel.h> #include <linux/init.h> -#include <asm/mach-au1x00/au1000.h> - extern char irq_tab_alchemy[][5]; int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) diff --git a/arch/mips/pci/ops-pnx8550.c b/arch/mips/pci/ops-pnx8550.c index d6106465249..0e160d9f07c 100644 --- a/arch/mips/pci/ops-pnx8550.c +++ b/arch/mips/pci/ops-pnx8550.c @@ -90,14 +90,14 @@ config_access(unsigned int pci_cmd, struct pci_bus *bus, unsigned int devfn, int loops--; if (loops == 0) { - printk("%s : Arbiter Locked.\n", __FUNCTION__); + printk("%s : Arbiter Locked.\n", __func__); } } clear_status(); if ((pci_cmd == PCI_CMD_IOR) || (pci_cmd == PCI_CMD_IOW)) { printk("%s timeout (GPPM_CTRL=%X) ioaddr %lX pci_cmd %X\n", - __FUNCTION__, inl(PCI_BASE | PCI_GPPM_CTRL), ioaddr, + __func__, inl(PCI_BASE | PCI_GPPM_CTRL), ioaddr, pci_cmd); } diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c index 624bbdbff2a..b6cab089561 100644 --- a/arch/mips/sgi-ip32/ip32-reset.c +++ b/arch/mips/sgi-ip32/ip32-reset.c @@ -142,7 +142,7 @@ static irqreturn_t ip32_rtc_int(int irq, void *dev_id) reg_c = CMOS_READ(RTC_INTR_FLAGS); if (!(reg_c & RTC_IRQF)) { printk(KERN_WARNING - "%s: RTC IRQ without RTC_IRQF\n", __FUNCTION__); + "%s: RTC IRQ without RTC_IRQF\n", __func__); } /* Wait until interrupt goes away */ disable_irq(MACEISA_RTC_IRQ); diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c index 3f808b62924..6d31f2a98ab 100644 --- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c +++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c @@ -173,7 +173,7 @@ static const u32 toshiba_rbtx4927_irq_debug_flag = { \ char tmp[100]; \ sprintf( tmp, str ); \ - printk( "%s(%s:%u)::%s", __FUNCTION__, __FILE__, __LINE__, tmp ); \ + printk( "%s(%s:%u)::%s", __func__, __FILE__, __LINE__, tmp ); \ } #else #define TOSHIBA_RBTX4927_IRQ_DPRINTK(flag, str...) diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c index e466e5e711d..2203c77b2ce 100644 --- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c +++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c @@ -93,7 +93,7 @@ static const u32 toshiba_rbtx4927_setup_debug_flag = { \ char tmp[100]; \ sprintf( tmp, str ); \ - printk( "%s(%s:%u)::%s", __FUNCTION__, __FILE__, __LINE__, tmp ); \ + printk( "%s(%s:%u)::%s", __func__, __FILE__, __LINE__, tmp ); \ } #else #define TOSHIBA_RBTX4927_SETUP_DPRINTK(flag, str...) diff --git a/arch/mips/tx4938/common/dbgio.c b/arch/mips/tx4938/common/dbgio.c index bea59ff1842..33b9c672a32 100644 --- a/arch/mips/tx4938/common/dbgio.c +++ b/arch/mips/tx4938/common/dbgio.c @@ -31,9 +31,7 @@ * Support for TX4938 in 2.6 - Hiroshi DOYU <Hiroshi_DOYU@montavista.co.jp> */ -#include <asm/mipsregs.h> -#include <asm/system.h> -#include <asm/tx4938/tx4938_mips.h> +#include <linux/types> extern u8 txx9_sio_kdbg_rd(void); extern int txx9_sio_kdbg_wr( u8 ch ); diff --git a/arch/mips/tx4938/common/prom.c b/arch/mips/tx4938/common/prom.c index 3189a65f7d7..20baeaeba4c 100644 --- a/arch/mips/tx4938/common/prom.c +++ b/arch/mips/tx4938/common/prom.c @@ -13,13 +13,8 @@ */ #include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/bootmem.h> - -#include <asm/addrspace.h> -#include <asm/bootinfo.h> -#include <asm/tx4938/tx4938.h> +#include <linux/types.h> +#include <linux/io.h> static unsigned int __init tx4938_process_sdccr(u64 * addr) @@ -35,7 +30,7 @@ tx4938_process_sdccr(u64 * addr) unsigned int bc = 4; unsigned int msize = 0; - val = (*((vu64 *) (addr))); + val = ____raw_readq((void __iomem *)addr); /* MVMCP -- need #defs for these bits masks */ sdccr_ce = ((val & (1 << 10)) >> 10); diff --git a/arch/mips/tx4938/toshiba_rbtx4938/irq.c b/arch/mips/tx4938/toshiba_rbtx4938/irq.c index f00185017e8..4d6a8dc46c7 100644 --- a/arch/mips/tx4938/toshiba_rbtx4938/irq.c +++ b/arch/mips/tx4938/toshiba_rbtx4938/irq.c @@ -67,24 +67,7 @@ IRQ Device 63 RBTX4938-IOC/07 SWINT */ #include <linux/init.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/ioport.h> -#include <linux/sched.h> #include <linux/interrupt.h> -#include <linux/pci.h> -#include <linux/timex.h> -#include <asm/bootinfo.h> -#include <asm/page.h> -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/processor.h> -#include <asm/reboot.h> -#include <asm/time.h> -#include <asm/wbflush.h> -#include <linux/bootmem.h> #include <asm/tx4938/rbtx4938.h> static void toshiba_rbtx4938_irq_ioc_enable(unsigned int irq); @@ -99,21 +82,16 @@ static struct irq_chip toshiba_rbtx4938_irq_ioc_type = { .unmask = toshiba_rbtx4938_irq_ioc_enable, }; -#define TOSHIBA_RBTX4938_IOC_INTR_ENAB 0xb7f02000 -#define TOSHIBA_RBTX4938_IOC_INTR_STAT 0xb7f0200a - int toshiba_rbtx4938_irq_nested(int sw_irq) { u8 level3; - level3 = reg_rd08(TOSHIBA_RBTX4938_IOC_INTR_STAT) & 0xff; - if (level3) { + level3 = readb(rbtx4938_imstat_addr); + if (level3) /* must use fls so onboard ATA has priority */ sw_irq = TOSHIBA_RBTX4938_IRQ_IOC_BEG + fls(level3) - 1; - } - wbflush(); return sw_irq; } @@ -144,25 +122,23 @@ toshiba_rbtx4938_irq_ioc_init(void) static void toshiba_rbtx4938_irq_ioc_enable(unsigned int irq) { - volatile unsigned char v; + unsigned char v; - v = TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB); + v = readb(rbtx4938_imask_addr); v |= (1 << (irq - TOSHIBA_RBTX4938_IRQ_IOC_BEG)); - TX4938_WR08(TOSHIBA_RBTX4938_IOC_INTR_ENAB, v); + writeb(v, rbtx4938_imask_addr); mmiowb(); - TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB); } static void toshiba_rbtx4938_irq_ioc_disable(unsigned int irq) { - volatile unsigned char v; + unsigned char v; - v = TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB); + v = readb(rbtx4938_imask_addr); v &= ~(1 << (irq - TOSHIBA_RBTX4938_IRQ_IOC_BEG)); - TX4938_WR08(TOSHIBA_RBTX4938_IOC_INTR_ENAB, v); + writeb(v, rbtx4938_imask_addr); mmiowb(); - TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB); } void __init arch_init_irq(void) @@ -174,14 +150,12 @@ void __init arch_init_irq(void) /* all IRC interrupt mode are Low Active. */ /* mask all IOC interrupts */ - *rbtx4938_imask_ptr = 0; + writeb(0, rbtx4938_imask_addr); /* clear SoftInt interrupts */ - *rbtx4938_softint_ptr = 0; + writeb(0, rbtx4938_softint_addr); tx4938_irq_init(); toshiba_rbtx4938_irq_ioc_init(); /* Onboard 10M Ether: High Active */ set_irq_type(RBTX4938_IRQ_ETHER, IRQF_TRIGGER_HIGH); - - wbflush(); } diff --git a/arch/mips/tx4938/toshiba_rbtx4938/setup.c b/arch/mips/tx4938/toshiba_rbtx4938/setup.c index 61249f049cd..3a3659e8633 100644 --- a/arch/mips/tx4938/toshiba_rbtx4938/setup.c +++ b/arch/mips/tx4938/toshiba_rbtx4938/setup.c @@ -21,8 +21,8 @@ #include <linux/pm.h> #include <linux/platform_device.h> #include <linux/clk.h> +#include <linux/gpio.h> -#include <asm/wbflush.h> #include <asm/reboot.h> #include <asm/time.h> #include <asm/txx9tmr.h> @@ -34,7 +34,7 @@ #endif #include <linux/spi/spi.h> #include <asm/tx4938/spi.h> -#include <asm/gpio.h> +#include <asm/txx9pio.h> extern char * __init prom_getcmdline(void); static inline void tx4938_report_pcic_status1(struct tx4938_pcic_reg *pcicptr); @@ -90,12 +90,11 @@ void rbtx4938_machine_restart(char *command) local_irq_disable(); printk("Rebooting..."); - *rbtx4938_softresetlock_ptr = 1; - *rbtx4938_sfvol_ptr = 1; - *rbtx4938_softreset_ptr = 1; - wbflush(); - - while(1); + writeb(1, rbtx4938_softresetlock_addr); + writeb(1, rbtx4938_sfvol_addr); + writeb(1, rbtx4938_softreset_addr); + while(1) + ; } void __init @@ -487,7 +486,7 @@ static int __init tx4938_pcibios_init(void) } /* Reset PCI Bus */ - *rbtx4938_pcireset_ptr = 0; + writeb(0, rbtx4938_pcireset_addr); /* Reset PCIC */ tx4938_ccfgptr->clkctr |= TX4938_CLKCTR_PCIRST; if (txboard_pci66_mode > 0) @@ -495,8 +494,8 @@ static int __init tx4938_pcibios_init(void) mdelay(10); /* clear PCIC reset */ tx4938_ccfgptr->clkctr &= ~TX4938_CLKCTR_PCIRST; - *rbtx4938_pcireset_ptr = 1; - wbflush(); + writeb(1, rbtx4938_pcireset_addr); + mmiowb(); tx4938_report_pcic_status1(tx4938_pcicptr); tx4938_report_pciclk(); @@ -504,15 +503,15 @@ static int __init tx4938_pcibios_init(void) if (txboard_pci66_mode == 0 && txboard_pci66_check(&tx4938_pci_controller[0], 0, 0)) { /* Reset PCI Bus */ - *rbtx4938_pcireset_ptr = 0; + writeb(0, rbtx4938_pcireset_addr); /* Reset PCIC */ tx4938_ccfgptr->clkctr |= TX4938_CLKCTR_PCIRST; tx4938_pciclk66_setup(); mdelay(10); /* clear PCIC reset */ tx4938_ccfgptr->clkctr &= ~TX4938_CLKCTR_PCIRST; - *rbtx4938_pcireset_ptr = 1; - wbflush(); + writeb(1, rbtx4938_pcireset_addr); + mmiowb(); /* Reinitialize PCIC */ tx4938_report_pciclk(); tx4938_pcic_setup(tx4938_pcicptr, &tx4938_pci_controller[0], io_base[0], extarb); @@ -615,9 +614,6 @@ static void __init rbtx4938_spi_setup(void) { /* set SPI_SEL */ tx4938_ccfgptr->pcfg |= TX4938_PCFG_SPI_SEL; - /* chip selects for SPI devices */ - tx4938_pioptr->dout |= (1 << SEEPROM1_CS); - tx4938_pioptr->dir |= (1 << SEEPROM1_CS); } static struct resource rbtx4938_fpga_resource; @@ -776,12 +772,13 @@ void __init tx4938_board_setup(void) txx9_tmr_init(TX4938_TMR_REG(i) & 0xfffffffffULL); /* enable DMA */ - TX4938_WR64(0xff1fb150, TX4938_DMA_MCR_MSTEN); - TX4938_WR64(0xff1fb950, TX4938_DMA_MCR_MSTEN); + for (i = 0; i < 2; i++) + ____raw_writeq(TX4938_DMA_MCR_MSTEN, + (void __iomem *)(TX4938_DMA_REG(i) + 0x50)); /* PIO */ - tx4938_pioptr->maskcpu = 0; - tx4938_pioptr->maskext = 0; + __raw_writel(0, &tx4938_pioptr->maskcpu); + __raw_writel(0, &tx4938_pioptr->maskext); /* TX4938 internal registers */ if (request_resource(&iomem_resource, &tx4938_reg_resource)) @@ -863,10 +860,6 @@ void __init plat_mem_setup(void) if (txx9_master_clock == 0) txx9_master_clock = 25000000; /* 25MHz */ tx4938_board_setup(); - /* setup serial stuff */ - TX4938_WR(0xff1ff314, 0x00000000); /* h/w flow control off */ - TX4938_WR(0xff1ff414, 0x00000000); /* h/w flow control off */ - #ifndef CONFIG_PCI set_io_port_base(RBTX4938_ETHER_BASE); #endif @@ -932,16 +925,16 @@ void __init plat_mem_setup(void) pcfg = tx4938_ccfgptr->pcfg; /* updated */ /* fixup piosel */ if ((pcfg & (TX4938_PCFG_ATA_SEL | TX4938_PCFG_NDF_SEL)) == - TX4938_PCFG_ATA_SEL) { - *rbtx4938_piosel_ptr = (*rbtx4938_piosel_ptr & 0x03) | 0x04; - } + TX4938_PCFG_ATA_SEL) + writeb((readb(rbtx4938_piosel_addr) & 0x03) | 0x04, + rbtx4938_piosel_addr); else if ((pcfg & (TX4938_PCFG_ATA_SEL | TX4938_PCFG_NDF_SEL)) == - TX4938_PCFG_NDF_SEL) { - *rbtx4938_piosel_ptr = (*rbtx4938_piosel_ptr & 0x03) | 0x08; - } - else { - *rbtx4938_piosel_ptr &= ~(0x08 | 0x04); - } + TX4938_PCFG_NDF_SEL) + writeb((readb(rbtx4938_piosel_addr) & 0x03) | 0x08, + rbtx4938_piosel_addr); + else + writeb(readb(rbtx4938_piosel_addr) & ~(0x08 | 0x04), + rbtx4938_piosel_addr); rbtx4938_fpga_resource.name = "FPGA Registers"; rbtx4938_fpga_resource.start = CPHYSADDR(RBTX4938_FPGA_REG_ADDR); @@ -950,17 +943,14 @@ void __init plat_mem_setup(void) if (request_resource(&iomem_resource, &rbtx4938_fpga_resource)) printk("request resource for fpga failed\n"); - /* disable all OnBoard I/O interrupts */ - *rbtx4938_imask_ptr = 0; - _machine_restart = rbtx4938_machine_restart; _machine_halt = rbtx4938_machine_halt; pm_power_off = rbtx4938_machine_power_off; - *rbtx4938_led_ptr = 0xff; - printk("RBTX4938 --- FPGA(Rev %02x)", *rbtx4938_fpga_rev_ptr); - printk(" DIPSW:%02x,%02x\n", - *rbtx4938_dipsw_ptr, *rbtx4938_bdipsw_ptr); + writeb(0xff, rbtx4938_led_addr); + printk(KERN_INFO "RBTX4938 --- FPGA(Rev %02x) DIPSW:%02x,%02x\n", + readb(rbtx4938_fpga_rev_addr), + readb(rbtx4938_dipsw_addr), readb(rbtx4938_bdipsw_addr)); } static int __init rbtx4938_ne_init(void) @@ -984,106 +974,48 @@ device_initcall(rbtx4938_ne_init); /* GPIO support */ -static DEFINE_SPINLOCK(rbtx4938_spi_gpio_lock); - -static void rbtx4938_spi_gpio_set(unsigned gpio, int value) +int gpio_to_irq(unsigned gpio) { - u8 val; - unsigned long flags; - gpio -= 16; - spin_lock_irqsave(&rbtx4938_spi_gpio_lock, flags); - val = *rbtx4938_spics_ptr; - if (value) - val |= 1 << gpio; - else - val &= ~(1 << gpio); - *rbtx4938_spics_ptr = val; - mmiowb(); - spin_unlock_irqrestore(&rbtx4938_spi_gpio_lock, flags); + return -EINVAL; } -static int rbtx4938_spi_gpio_dir_out(unsigned gpio, int value) +int irq_to_gpio(unsigned irq) { - rbtx4938_spi_gpio_set(gpio, value); - return 0; + return -EINVAL; } -static DEFINE_SPINLOCK(tx4938_gpio_lock); - -static int tx4938_gpio_get(unsigned gpio) -{ - return tx4938_pioptr->din & (1 << gpio); -} +static DEFINE_SPINLOCK(rbtx4938_spi_gpio_lock); -static void tx4938_gpio_set_raw(unsigned gpio, int value) +static void rbtx4938_spi_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { - u32 val; - val = tx4938_pioptr->dout; + u8 val; + unsigned long flags; + spin_lock_irqsave(&rbtx4938_spi_gpio_lock, flags); + val = readb(rbtx4938_spics_addr); if (value) - val |= 1 << gpio; + val |= 1 << offset; else - val &= ~(1 << gpio); - tx4938_pioptr->dout = val; -} - -static void tx4938_gpio_set(unsigned gpio, int value) -{ - unsigned long flags; - spin_lock_irqsave(&tx4938_gpio_lock, flags); - tx4938_gpio_set_raw(gpio, value); - mmiowb(); - spin_unlock_irqrestore(&tx4938_gpio_lock, flags); -} - -static int tx4938_gpio_dir_in(unsigned gpio) -{ - spin_lock_irq(&tx4938_gpio_lock); - tx4938_pioptr->dir &= ~(1 << gpio); + val &= ~(1 << offset); + writeb(val, rbtx4938_spics_addr); mmiowb(); - spin_unlock_irq(&tx4938_gpio_lock); - return 0; -} - -static int tx4938_gpio_dir_out(unsigned int gpio, int value) -{ - spin_lock_irq(&tx4938_gpio_lock); - tx4938_gpio_set_raw(gpio, value); - tx4938_pioptr->dir |= 1 << gpio; - mmiowb(); - spin_unlock_irq(&tx4938_gpio_lock); - return 0; -} - -int gpio_direction_input(unsigned gpio) -{ - if (gpio < 16) - return tx4938_gpio_dir_in(gpio); - return -EINVAL; -} - -int gpio_direction_output(unsigned gpio, int value) -{ - if (gpio < 16) - return tx4938_gpio_dir_out(gpio, value); - if (gpio < 16 + 3) - return rbtx4938_spi_gpio_dir_out(gpio, value); - return -EINVAL; + spin_unlock_irqrestore(&rbtx4938_spi_gpio_lock, flags); } -int gpio_get_value(unsigned gpio) +static int rbtx4938_spi_gpio_dir_out(struct gpio_chip *chip, + unsigned int offset, int value) { - if (gpio < 16) - return tx4938_gpio_get(gpio); + rbtx4938_spi_gpio_set(chip, offset, value); return 0; } -void gpio_set_value(unsigned gpio, int value) -{ - if (gpio < 16) - tx4938_gpio_set(gpio, value); - else - rbtx4938_spi_gpio_set(gpio, value); -} +static struct gpio_chip rbtx4938_spi_gpio_chip = { + .set = rbtx4938_spi_gpio_set, + .direction_output = rbtx4938_spi_gpio_dir_out, + .label = "RBTX4938-SPICS", + .base = 16, + .ngpio = 3, +}; /* SPI support */ @@ -1094,7 +1026,6 @@ static void __init txx9_spi_init(unsigned long base, int irq) .start = base, .end = base + 0x20 - 1, .flags = IORESOURCE_MEM, - .parent = &tx4938_reg_resource, }, { .start = irq, .flags = IORESOURCE_IRQ, @@ -1118,10 +1049,25 @@ static int __init rbtx4938_spi_init(void) spi_eeprom_register(SEEPROM1_CS); spi_eeprom_register(16 + SEEPROM2_CS); spi_eeprom_register(16 + SEEPROM3_CS); + gpio_request(16 + SRTC_CS, "rtc-rs5c348"); + gpio_direction_output(16 + SRTC_CS, 0); + gpio_request(SEEPROM1_CS, "seeprom1"); + gpio_direction_output(SEEPROM1_CS, 1); + gpio_request(16 + SEEPROM2_CS, "seeprom2"); + gpio_direction_output(16 + SEEPROM2_CS, 1); + gpio_request(16 + SEEPROM3_CS, "seeprom3"); + gpio_direction_output(16 + SEEPROM3_CS, 1); txx9_spi_init(TX4938_SPI_REG & 0xfffffffffULL, RBTX4938_IRQ_IRC_SPI); return 0; } -arch_initcall(rbtx4938_spi_init); + +static int __init rbtx4938_arch_init(void) +{ + txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, 16); + gpiochip_add(&rbtx4938_spi_gpio_chip); + return rbtx4938_spi_init(); +} +arch_initcall(rbtx4938_arch_init); /* Watchdog support */ @@ -1131,7 +1077,6 @@ static int __init txx9_wdt_init(unsigned long base) .start = base, .end = base + 0x100 - 1, .flags = IORESOURCE_MEM, - .parent = &tx4938_reg_resource, }; struct platform_device *dev = platform_device_register_simple("txx9wdt", -1, &res, 1); diff --git a/arch/mips/vr41xx/common/init.c b/arch/mips/vr41xx/common/init.c index 76d4b5ed3fc..c64995342ba 100644 --- a/arch/mips/vr41xx/common/init.c +++ b/arch/mips/vr41xx/common/init.c @@ -1,7 +1,7 @@ /* * init.c, Common initialization routines for NEC VR4100 series. * - * Copyright (C) 2003-2005 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp> + * Copyright (C) 2003-2008 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -53,6 +53,8 @@ void __init plat_time_init(void) void __init plat_mem_setup(void) { iomem_resource_init(); + + vr41xx_siu_setup(); } void __init prom_init(void) diff --git a/arch/mips/vr41xx/common/siu.c b/arch/mips/vr41xx/common/siu.c index b735f45b25f..654dee6208b 100644 --- a/arch/mips/vr41xx/common/siu.c +++ b/arch/mips/vr41xx/common/siu.c @@ -1,7 +1,7 @@ /* * NEC VR4100 series SIU platform device. * - * Copyright (C) 2007 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp> + * Copyright (C) 2007-2008 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -118,3 +118,37 @@ err_free_device: return retval; } device_initcall(vr41xx_siu_add); + +void __init vr41xx_siu_setup(void) +{ + struct uart_port port; + struct resource *res; + unsigned int *type; + int i; + + switch (current_cpu_type()) { + case CPU_VR4111: + case CPU_VR4121: + type = siu_type1_ports; + res = siu_type1_resource; + break; + case CPU_VR4122: + case CPU_VR4131: + case CPU_VR4133: + type = siu_type2_ports; + res = siu_type2_resource; + break; + default: + return; + } + + for (i = 0; i < SIU_PORTS_MAX; i++) { + port.line = i; + port.type = type[i]; + if (port.type == PORT_UNKNOWN) + break; + port.mapbase = res[i].start; + port.membase = (unsigned char __iomem *)KSEG1ADDR(res[i].start); + vr41xx_siu_early_setup(&port); + } +} diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index eb80f5e33d7..1f012843150 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -603,15 +603,18 @@ void show_mem(void) #ifdef CONFIG_DISCONTIGMEM { struct zonelist *zl; - int i, j, k; + int i, j; for (i = 0; i < npmem_ranges; i++) { + zl = node_zonelist(i); for (j = 0; j < MAX_NR_ZONES; j++) { - zl = NODE_DATA(i)->node_zonelists + j; + struct zoneref *z; + struct zone *zone; printk("Zone list for zone %d on node %d: ", j, i); - for (k = 0; zl->zones[k] != NULL; k++) - printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name); + for_each_zone_zonelist(zone, z, zl, j) + printk("[%d/%s] ", zone_to_nid(zone), + zone->name); printk("\n"); } } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4bb2e9310a5..4e40c122bf2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -626,20 +626,6 @@ config ADVANCED_OPTIONS comment "Default settings for advanced configuration options are used" depends on !ADVANCED_OPTIONS -config HIGHMEM_START_BOOL - bool "Set high memory pool address" - depends on ADVANCED_OPTIONS && HIGHMEM - help - This option allows you to set the base address of the kernel virtual - area used to map high memory pages. This can be useful in - optimizing the layout of kernel virtual memory. - - Say N here unless you know what you are doing. - -config HIGHMEM_START - hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL - default "0xfe000000" - config LOWMEM_SIZE_BOOL bool "Set maximum low memory" depends on ADVANCED_OPTIONS @@ -656,21 +642,76 @@ config LOWMEM_SIZE hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL default "0x30000000" +config RELOCATABLE + bool "Build a relocatable kernel (EXPERIMENTAL)" + depends on EXPERIMENTAL && ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE + help + This builds a kernel image that is capable of running at the + location the kernel is loaded at (some alignment restrictions may + exist). + + One use is for the kexec on panic case where the recovery kernel + must live at a different physical address than the primary + kernel. + + Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address + it has been loaded at and the compile time physical addresses + CONFIG_PHYSICAL_START is ignored. However CONFIG_PHYSICAL_START + setting can still be useful to bootwrappers that need to know the + load location of the kernel (eg. u-boot/mkimage). + +config PAGE_OFFSET_BOOL + bool "Set custom page offset address" + depends on ADVANCED_OPTIONS + help + This option allows you to set the kernel virtual address at which + the kernel will map low memory. This can be useful in optimizing + the virtual memory layout of the system. + + Say N here unless you know what you are doing. + +config PAGE_OFFSET + hex "Virtual address of memory base" if PAGE_OFFSET_BOOL + default "0xc0000000" + config KERNEL_START_BOOL bool "Set custom kernel base address" depends on ADVANCED_OPTIONS help This option allows you to set the kernel virtual address at which - the kernel will map low memory (the kernel image will be linked at - this address). This can be useful in optimizing the virtual memory - layout of the system. + the kernel will be loaded. Normally this should match PAGE_OFFSET + however there are times (like kdump) that one might not want them + to be the same. Say N here unless you know what you are doing. config KERNEL_START hex "Virtual address of kernel base" if KERNEL_START_BOOL + default PAGE_OFFSET if PAGE_OFFSET_BOOL + default "0xc2000000" if CRASH_DUMP default "0xc0000000" +config PHYSICAL_START_BOOL + bool "Set physical address where the kernel is loaded" + depends on ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE + help + This gives the physical address where the kernel is loaded. + + Say N here unless you know what you are doing. + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if PHYSICAL_START_BOOL + default "0x02000000" if PPC_STD_MMU && CRASH_DUMP + default "0x00000000" + +config PHYSICAL_ALIGN + hex + default "0x10000000" if FSL_BOOKE + help + This value puts the alignment restrictions on physical address + where kernel is loaded and run from. Kernel is compiled for an + address which meets above alignment restriction. + config TASK_SIZE_BOOL bool "Set custom user task size" depends on ADVANCED_OPTIONS @@ -717,9 +758,17 @@ config PIN_TLB endmenu if PPC64 +config PAGE_OFFSET + hex + default "0xc000000000000000" config KERNEL_START hex + default "0xc000000002000000" if CRASH_DUMP default "0xc000000000000000" +config PHYSICAL_START + hex + default "0x02000000" if CRASH_DUMP + default "0x00000000" endif source "net/Kconfig" @@ -754,3 +803,4 @@ config PPC_CLOCK config PPC_LIB_RHEAP bool +source "arch/powerpc/kvm/Kconfig" diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index a86d8d85321..807a2dce626 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -151,6 +151,9 @@ config BOOTX_TEXT config PPC_EARLY_DEBUG bool "Early debugging (dangerous)" + # PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water + # mark, which doesn't work with current 440 KVM. + depends on !KVM help Say Y to enable some early debugging facilities that may be available for your processor/board combination. Those facilities are hacks diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index e2ec4a91cce..9dcdc036cdf 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -145,6 +145,7 @@ core-y += arch/powerpc/kernel/ \ arch/powerpc/platforms/ core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ core-$(CONFIG_XMON) += arch/powerpc/xmon/ +core-$(CONFIG_KVM) += arch/powerpc/kvm/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 5ef2bdf8d18..2347294ff35 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -27,6 +27,7 @@ zImage.chrp zImage.coff zImage.coff.lds zImage.ep* +zImage.iseries zImage.*lds zImage.miboot zImage.pmac diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 5ba50c67339..7822d25c9d3 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -40,7 +40,7 @@ $(obj)/ebony.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-walnut.o: BOOTCFLAGS += -mcpu=405 -$(obj)/virtex405-head.o: BOOTCFLAGS += -mcpu=405 +$(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405 zlib := inffast.c inflate.c inftrees.c diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts index 6f3d38a1554..39634124929 100644 --- a/arch/powerpc/boot/dts/canyonlands.dts +++ b/arch/powerpc/boot/dts/canyonlands.dts @@ -142,8 +142,45 @@ #address-cells = <2>; #size-cells = <1>; clock-frequency = <0>; /* Filled in by U-Boot */ + /* ranges property is supplied by U-Boot */ interrupts = <6 4>; interrupt-parent = <&UIC1>; + + nor_flash@0,0 { + compatible = "amd,s29gl512n", "cfi-flash"; + bank-width = <2>; + reg = <0 000000 4000000>; + #address-cells = <1>; + #size-cells = <1>; + partition@0 { + label = "kernel"; + reg = <0 1e0000>; + }; + partition@1e0000 { + label = "dtb"; + reg = <1e0000 20000>; + }; + partition@200000 { + label = "ramdisk"; + reg = <200000 1400000>; + }; + partition@1600000 { + label = "jffs2"; + reg = <1600000 400000>; + }; + partition@1a00000 { + label = "user"; + reg = <1a00000 2560000>; + }; + partition@3f60000 { + label = "env"; + reg = <3f60000 40000>; + }; + partition@3fa0000 { + label = "u-boot"; + reg = <3fa0000 60000>; + }; + }; }; UART0: serial@ef600300 { diff --git a/arch/powerpc/boot/dts/glacier.dts b/arch/powerpc/boot/dts/glacier.dts index 958a5ca53d3..0f2fc077d8d 100644 --- a/arch/powerpc/boot/dts/glacier.dts +++ b/arch/powerpc/boot/dts/glacier.dts @@ -145,8 +145,45 @@ #address-cells = <2>; #size-cells = <1>; clock-frequency = <0>; /* Filled in by U-Boot */ + /* ranges property is supplied by U-Boot */ interrupts = <6 4>; interrupt-parent = <&UIC1>; + + nor_flash@0,0 { + compatible = "amd,s29gl512n", "cfi-flash"; + bank-width = <2>; + reg = <0 000000 4000000>; + #address-cells = <1>; + #size-cells = <1>; + partition@0 { + label = "kernel"; + reg = <0 1e0000>; + }; + partition@1e0000 { + label = "dtb"; + reg = <1e0000 20000>; + }; + partition@200000 { + label = "ramdisk"; + reg = <200000 1400000>; + }; + partition@1600000 { + label = "jffs2"; + reg = <1600000 400000>; + }; + partition@1a00000 { + label = "user"; + reg = <1a00000 2560000>; + }; + partition@3f60000 { + label = "env"; + reg = <3f60000 40000>; + }; + partition@3fa0000 { + label = "u-boot"; + reg = <3fa0000 60000>; + }; + }; }; UART0: serial@ef600300 { diff --git a/arch/powerpc/boot/dts/mpc8610_hpcd.dts b/arch/powerpc/boot/dts/mpc8610_hpcd.dts index 16c947b8a72..1f2f1e0a557 100644 --- a/arch/powerpc/boot/dts/mpc8610_hpcd.dts +++ b/arch/powerpc/boot/dts/mpc8610_hpcd.dts @@ -45,6 +45,11 @@ reg = <0x00000000 0x20000000>; // 512M at 0x0 }; + board-control@e8000000 { + compatible = "fsl,fpga-pixis"; + reg = <0xe8000000 32>; // pixis at 0xe8000000 + }; + soc@e0000000 { #address-cells = <1>; #size-cells = <1>; @@ -104,6 +109,13 @@ interrupt-parent = <&mpic>; }; + display@2c000 { + compatible = "fsl,diu"; + reg = <0x2c000 100>; + interrupts = <72 2>; + interrupt-parent = <&mpic>; + }; + mpic: interrupt-controller@40000 { clock-frequency = <0>; interrupt-controller; diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c index aef3bdc8916..8c9ead94be0 100644 --- a/arch/powerpc/boot/ns16550.c +++ b/arch/powerpc/boot/ns16550.c @@ -55,10 +55,15 @@ static u8 ns16550_tstc(void) int ns16550_console_init(void *devp, struct serial_console_data *scdp) { int n; + u32 reg_offset; if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) return -1; + n = getprop(devp, "reg-offset", ®_offset, sizeof(reg_offset)); + if (n == sizeof(reg_offset)) + reg_base += reg_offset; + n = getprop(devp, "reg-shift", ®_shift, sizeof(reg_shift)); if (n != sizeof(reg_shift)) reg_shift = 0; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ce1e8d24e74..9177b21b1a9 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -106,4 +106,13 @@ PHONY += systbl_chk systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i $(call cmd,systbl_chk) +$(obj)/built-in.o: prom_init_check + +quiet_cmd_prom_init_check = CALL $< + cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o" + +PHONY += prom_init_check +prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o + $(call cmd,prom_init_check) + clean-files := vmlinux.lds diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 292c6d8db0e..62134845af0 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -23,6 +23,9 @@ #include <linux/mm.h> #include <linux/suspend.h> #include <linux/hrtimer.h> +#ifdef CONFIG_KVM +#include <linux/kvm_host.h> +#endif #ifdef CONFIG_PPC64 #include <linux/time.h> #include <linux/hardirq.h> @@ -93,10 +96,7 @@ int main(void) DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); -#ifdef CONFIG_PPC32 - DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); -#endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); @@ -165,13 +165,9 @@ int main(void) /* Interrupt register frame */ DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); -#ifndef CONFIG_PPC64 - DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); -#else /* CONFIG_PPC64 */ + DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); +#ifdef CONFIG_PPC64 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); - /* 288 = # of volatile regs, int & fp, for leaf routines */ - /* which do not stack a frame. See the PPC64 ABI. */ - DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288); /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); @@ -331,5 +327,30 @@ int main(void) DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); +#ifdef CONFIG_KVM + DEFINE(TLBE_BYTES, sizeof(struct tlbe)); + + DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); + DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); + DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb)); + DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); + DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); + DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); + DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); + DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); + DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); + DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); + DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); + DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); + DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid)); + + DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); + DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); + DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S index 5465e8de0e6..e3623e3e345 100644 --- a/arch/powerpc/kernel/cpu_setup_44x.S +++ b/arch/powerpc/kernel/cpu_setup_44x.S @@ -33,7 +33,6 @@ _GLOBAL(__setup_cpu_440grx) mtlr r4 blr _GLOBAL(__setup_cpu_460ex) -_GLOBAL(__setup_cpu_460gt) b __init_fpu_44x _GLOBAL(__setup_cpu_440gx) _GLOBAL(__setup_cpu_440spe) diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f1ee0b3f78f..72d1d739525 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -17,7 +17,13 @@ #include <asm/cache.h> _GLOBAL(__setup_cpu_603) - b setup_common_caches + mflr r4 +BEGIN_FTR_SECTION + bl __init_fpu_registers +END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) + bl setup_common_caches + mtlr r4 + blr _GLOBAL(__setup_cpu_604) mflr r4 bl setup_common_caches diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 26ffb44e270..36080d4d192 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -37,7 +37,6 @@ extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec); @@ -1416,10 +1415,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_value = 0x13020000, .cpu_name = "460GT", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460gt, .machine_check = machine_check_440A, .platform = "ppc440", }, diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 4ff74414356..e581524d85b 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -371,6 +371,17 @@ skpinv: addi r6,r6,1 /* Increment */ bl early_init +#ifdef CONFIG_RELOCATABLE + lis r3,kernstart_addr@ha + la r3,kernstart_addr@l(r3) +#ifdef CONFIG_PHYS_64BIT + stw r23,0(r3) + stw r25,4(r3) +#else + stw r25,0(r3) +#endif +#endif + mfspr r3,SPRN_TLB1CFG andi. r3,r3,0xfff lis r4,num_tlbcam_entries@ha diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 9d2c56621f1..92ccc6fcc5b 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -152,7 +152,7 @@ _GLOBAL(low_choose_750fx_pll) mtspr SPRN_HID1,r4 /* Store new HID1 image */ - rlwinm r6,r1,0,0,18 + rlwinm r6,r1,0,0,(31-THREAD_SHIFT) lwz r6,TI_CPU(r6) slwi r6,r6,2 addis r6,r6,nap_save_hid1@ha @@ -281,7 +281,7 @@ _GLOBAL(_tlbia) #endif /* CONFIG_SMP */ #else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */ #if defined(CONFIG_SMP) - rlwinm r8,r1,0,0,18 + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) lwz r8,TI_CPU(r8) oris r8,r8,10 mfmsr r10 @@ -377,7 +377,7 @@ _GLOBAL(_tlbie) #endif /* CONFIG_SMP */ #else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */ #if defined(CONFIG_SMP) - rlwinm r8,r1,0,0,18 + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) lwz r8,TI_CPU(r8) oris r8,r8,11 mfmsr r10 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index a3c491e88a7..942951e7658 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -27,23 +27,11 @@ .text -_GLOBAL(get_msr) - mfmsr r3 - blr - -_GLOBAL(get_srr0) - mfsrr0 r3 - blr - -_GLOBAL(get_srr1) - mfsrr1 r3 - blr - #ifdef CONFIG_IRQSTACKS _GLOBAL(call_do_softirq) mflr r0 std r0,16(r1) - stdu r1,THREAD_SIZE-112(r3) + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 bl .__do_softirq ld r1,0(r1) @@ -56,7 +44,7 @@ _GLOBAL(call_handle_irq) mflr r0 std r0,16(r1) mtctr r8 - stdu r1,THREAD_SIZE-112(r5) + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) mr r1,r5 bctrl ld r1,0(r1) @@ -599,7 +587,7 @@ _GLOBAL(kexec_sequence) std r0,16(r1) /* switch stacks to newstack -- &kexec_stack.stack */ - stdu r1,THREAD_SIZE-112(r3) + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 li r0,0 @@ -616,7 +604,7 @@ _GLOBAL(kexec_sequence) std r26,-48(r1) std r25,-56(r1) - stdu r1,-112-64(r1) + stdu r1,-STACK_FRAME_OVERHEAD-64(r1) /* save args into preserved regs */ mr r31,r3 /* newstack (both) */ diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index fb698d47082..e79ad8afda0 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -275,6 +275,8 @@ static int __devinit of_pci_phb_probe(struct of_device *dev, /* Scan the bus */ scan_phb(phb); + if (phb->bus == NULL) + return -ENXIO; /* Claim resources. This might need some rework as well depending * wether we are doing probe-only or not, like assigning unassigned diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index ac163bd46cf..c9bf17eec31 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -7,17 +7,11 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/types.h> #include <linux/threads.h> #include <linux/module.h> -#include <asm/processor.h> -#include <asm/ptrace.h> -#include <asm/page.h> #include <asm/lppaca.h> #include <asm/paca.h> -#include <asm/mmu.h> - /* This symbol is provided by the linker - let it fill in the paca * field correctly */ @@ -65,60 +59,29 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = { * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -#define PACA_INIT(number) \ -{ \ - .lppaca_ptr = &lppaca[number], \ - .lock_token = 0x8000, \ - .paca_index = (number), /* Paca Index */ \ - .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \ - .hw_cpu_id = 0xffff, \ - .slb_shadow_ptr = &slb_shadow[number], \ - .__current = &init_task, \ -} - -struct paca_struct paca[] = { - PACA_INIT(0), -#if NR_CPUS > 1 - PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3), -#if NR_CPUS > 4 - PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7), -#if NR_CPUS > 8 - PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11), - PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15), - PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19), - PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23), - PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27), - PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31), -#if NR_CPUS > 32 - PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35), - PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39), - PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43), - PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47), - PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51), - PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55), - PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59), - PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63), -#if NR_CPUS > 64 - PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67), - PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71), - PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75), - PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79), - PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83), - PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87), - PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91), - PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95), - PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99), - PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103), - PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107), - PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111), - PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115), - PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119), - PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123), - PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127), -#endif -#endif -#endif -#endif -#endif -}; +struct paca_struct paca[NR_CPUS]; EXPORT_SYMBOL(paca); + +void __init initialise_pacas(void) +{ + int cpu; + + /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB + * of the TOC can be addressed using a single machine instruction. + */ + unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; + + /* Can't use for_each_*_cpu, as they aren't functional yet */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct paca_struct *new_paca = &paca[cpu]; + + new_paca->lppaca_ptr = &lppaca[cpu]; + new_paca->lock_token = 0x8000; + new_paca->paca_index = cpu; + new_paca->kernel_toc = kernel_toc; + new_paca->hw_cpu_id = 0xffff; + new_paca->slb_shadow_ptr = &slb_shadow[cpu]; + new_paca->__current = &init_task; + + } +} diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h index fda05e2211d..90e56277179 100644 --- a/arch/powerpc/kernel/ppc32.h +++ b/arch/powerpc/kernel/ppc32.h @@ -135,6 +135,4 @@ struct ucontext32 { struct mcontext32 uc_mcontext; }; -extern int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s); - #endif /* _PPC64_PPC32_H */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 703100d5e45..6caad17ea72 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1033,3 +1033,34 @@ void ppc64_runlatch_off(void) } } #endif + +#if THREAD_SHIFT < PAGE_SHIFT + +static struct kmem_cache *thread_info_cache; + +struct thread_info *alloc_thread_info(struct task_struct *tsk) +{ + struct thread_info *ti; + + ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL); + if (unlikely(ti == NULL)) + return NULL; +#ifdef CONFIG_DEBUG_STACK_USAGE + memset(ti, 0, THREAD_SIZE); +#endif + return ti; +} + +void free_thread_info(struct thread_info *ti) +{ + kmem_cache_free(thread_info_cache, ti); +} + +void thread_info_cache_init(void) +{ + thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, + THREAD_SIZE, 0, NULL); + BUG_ON(thread_info_cache == NULL); +} + +#endif /* THREAD_SHIFT < PAGE_SHIFT */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 3bfe7837e82..2aefe2a4129 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -53,6 +53,7 @@ #include <asm/pci-bridge.h> #include <asm/phyp_dump.h> #include <asm/kexec.h> +#include <mm/mmu_decl.h> #ifdef DEBUG #define DBG(fmt...) printk(KERN_ERR fmt) @@ -978,7 +979,10 @@ static int __init early_init_dt_scan_memory(unsigned long node, } #endif lmb_add(base, size); + + memstart_addr = min((u64)memstart_addr, base); } + return 0; } diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh new file mode 100644 index 00000000000..8e24fc1821e --- /dev/null +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -0,0 +1,58 @@ +#!/bin/sh +# +# Copyright © 2008 IBM Corporation +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version +# 2 of the License, or (at your option) any later version. + +# This script checks prom_init.o to see what external symbols it +# is using, if it finds symbols not in the whitelist it returns +# an error. The point of this is to discourage people from +# intentionally or accidentally adding new code to prom_init.c +# which has side effects on other parts of the kernel. + +# If you really need to reference something from prom_init.o add +# it to the list below: + +WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush +_end enter_prom memcpy memset reloc_offset __secondary_hold +__secondary_hold_acknowledge __secondary_hold_spinloop __start +strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 +reloc_got2" + +NM="$1" +OBJ="$2" + +ERROR=0 + +for UNDEF in $($NM -u $OBJ | awk '{print $2}') +do + # On 64-bit nm gives us the function descriptors, which have + # a leading . on the name, so strip it off here. + UNDEF="${UNDEF#.}" + + if [ $KBUILD_VERBOSE ]; then + if [ $KBUILD_VERBOSE -ne 0 ]; then + echo "Checking prom_init.o symbol '$UNDEF'" + fi + fi + + OK=0 + for WHITE in $WHITELIST + do + if [ "$UNDEF" = "$WHITE" ]; then + OK=1 + break + fi + done + + if [ $OK -eq 0 ]; then + ERROR=1 + echo "Error: External symbol '$UNDEF' referenced" \ + "from prom_init.c" >&2 + fi +done + +exit $ERROR diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 9d30e10970a..4c1de6af4c0 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -29,15 +29,12 @@ #include <linux/security.h> #include <linux/signal.h> #include <linux/compat.h> -#include <linux/elf.h> #include <asm/uaccess.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/system.h> -#include "ppc32.h" - /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -67,27 +64,6 @@ static long compat_ptrace_old(struct task_struct *child, long request, return -EPERM; } -static int compat_ptrace_getsiginfo(struct task_struct *child, compat_siginfo_t __user *data) -{ - siginfo_t lastinfo; - int error = -ESRCH; - - read_lock(&tasklist_lock); - if (likely(child->sighand != NULL)) { - error = -EINVAL; - spin_lock_irq(&child->sighand->siglock); - if (likely(child->last_siginfo != NULL)) { - lastinfo = *child->last_siginfo; - error = 0; - } - spin_unlock_irq(&child->sighand->siglock); - } - read_unlock(&tasklist_lock); - if (!error) - return copy_siginfo_to_user32(data, &lastinfo); - return error; -} - long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { @@ -306,9 +282,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, 0, PT_REGS_COUNT * sizeof(compat_long_t), compat_ptr(data)); - case PTRACE_GETSIGINFO: - return compat_ptrace_getsiginfo(child, compat_ptr(data)); - case PTRACE_GETFPREGS: case PTRACE_SETFPREGS: case PTRACE_GETVRREGS: diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 31ada9fdfc5..dff6308d1b5 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -170,6 +170,9 @@ void __init setup_paca(int cpu) void __init early_setup(unsigned long dt_ptr) { + /* Fill in any unititialised pacas */ + initialise_pacas(); + /* Identify CPU type */ identify_cpu(0, mfspr(SPRN_PVR)); @@ -435,7 +438,7 @@ void __init setup_system(void) printk("htab_address = 0x%p\n", htab_address); printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); #if PHYSICAL_START > 0 - printk("physical_start = 0x%x\n", PHYSICAL_START); + printk("physical_start = 0x%lx\n", PHYSICAL_START); #endif printk("-----------------------------------------------------\n"); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index e3638eeaaae..96294403843 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -13,7 +13,6 @@ #include <linux/sched.h> #include <linux/stacktrace.h> #include <asm/ptrace.h> -#include <asm/asm-offsets.h> /* * Save stack-backtrace addresses into a stack_trace buffer. diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 7aad6203e41..7d6c9bb8c77 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -154,8 +154,8 @@ static void udbg_console_write(struct console *con, const char *s, static struct console udbg_console = { .name = "udbg", .write = udbg_console_write, - .flags = CON_PRINTBUFFER | CON_ENABLED | CON_BOOT, - .index = -1, + .flags = CON_PRINTBUFFER | CON_ENABLED | CON_BOOT | CON_ANYTIME, + .index = 0, }; static int early_console_initialized; diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c new file mode 100644 index 00000000000..f5d7a5eab96 --- /dev/null +++ b/arch/powerpc/kvm/44x_tlb.c @@ -0,0 +1,224 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> +#include <asm/mmu-44x.h> +#include <asm/kvm_ppc.h> + +#include "44x_tlb.h" + +#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) +#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) + +static unsigned int kvmppc_tlb_44x_pos; + +static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) +{ + /* Mask off reserved bits. */ + attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; + + if (!usermode) { + /* Guest is in supervisor mode, so we need to translate guest + * supervisor permissions into user permissions. */ + attrib &= ~PPC44x_TLB_USER_PERM_MASK; + attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3; + } + + /* Make sure host can always access this memory. */ + attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; + + return attrib; +} + +/* Search the guest TLB for a matching entry. */ +int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, + unsigned int as) +{ + int i; + + /* XXX Replace loop with fancy data structures. */ + for (i = 0; i < PPC44x_TLB_SIZE; i++) { + struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; + unsigned int tid; + + if (eaddr < get_tlb_eaddr(tlbe)) + continue; + + if (eaddr > get_tlb_end(tlbe)) + continue; + + tid = get_tlb_tid(tlbe); + if (tid && (tid != pid)) + continue; + + if (!get_tlb_v(tlbe)) + continue; + + if (get_tlb_ts(tlbe) != as) + continue; + + return i; + } + + return -1; +} + +struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_IS); + unsigned int index; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); + if (index == -1) + return NULL; + return &vcpu->arch.guest_tlb[index]; +} + +struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_DS); + unsigned int index; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); + if (index == -1) + return NULL; + return &vcpu->arch.guest_tlb[index]; +} + +static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) +{ + return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); +} + +/* Must be called with mmap_sem locked for writing. */ +static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, + unsigned int index) +{ + struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index]; + struct page *page = vcpu->arch.shadow_pages[index]; + + kunmap(vcpu->arch.shadow_pages[index]); + + if (get_tlb_v(stlbe)) { + if (kvmppc_44x_tlbe_is_writable(stlbe)) + kvm_release_page_dirty(page); + else + kvm_release_page_clean(page); + } +} + +/* Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. */ +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, + u32 flags) +{ + struct page *new_page; + struct tlbe *stlbe; + hpa_t hpaddr; + unsigned int victim; + + /* Future optimization: don't overwrite the TLB entry containing the + * current PC (or stack?). */ + victim = kvmppc_tlb_44x_pos++; + if (kvmppc_tlb_44x_pos > tlb_44x_hwater) + kvmppc_tlb_44x_pos = 0; + stlbe = &vcpu->arch.shadow_tlb[victim]; + + /* Get reference to new page. */ + down_write(¤t->mm->mmap_sem); + new_page = gfn_to_page(vcpu->kvm, gfn); + if (is_error_page(new_page)) { + printk(KERN_ERR "Couldn't get guest page!\n"); + kvm_release_page_clean(new_page); + return; + } + hpaddr = page_to_phys(new_page); + + /* Drop reference to old page. */ + kvmppc_44x_shadow_release(vcpu, victim); + up_write(¤t->mm->mmap_sem); + + vcpu->arch.shadow_pages[victim] = new_page; + + /* XXX Make sure (va, size) doesn't overlap any other + * entries. 440x6 user manual says the result would be + * "undefined." */ + + /* XXX what about AS? */ + + stlbe->tid = asid & 0xff; + + /* Force TS=1 for all guest mappings. */ + /* For now we hardcode 4KB mappings, but it will be important to + * use host large pages in the future. */ + stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS + | PPC44x_TLB_4K; + + stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); + stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, + vcpu->arch.msr & MSR_PR); +} + +void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid) +{ + unsigned int pid = asid & 0xff; + int i; + + /* XXX Replace loop with fancy data structures. */ + down_write(¤t->mm->mmap_sem); + for (i = 0; i <= tlb_44x_hwater; i++) { + struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; + unsigned int tid; + + if (!get_tlb_v(stlbe)) + continue; + + if (eaddr < get_tlb_eaddr(stlbe)) + continue; + + if (eaddr > get_tlb_end(stlbe)) + continue; + + tid = get_tlb_tid(stlbe); + if (tid && (tid != pid)) + continue; + + kvmppc_44x_shadow_release(vcpu, i); + stlbe->word0 = 0; + } + up_write(¤t->mm->mmap_sem); +} + +/* Invalidate all mappings, so that when they fault back in they will get the + * proper permission bits. */ +void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +{ + int i; + + /* XXX Replace loop with fancy data structures. */ + down_write(¤t->mm->mmap_sem); + for (i = 0; i <= tlb_44x_hwater; i++) { + kvmppc_44x_shadow_release(vcpu, i); + vcpu->arch.shadow_tlb[i].word0 = 0; + } + up_write(¤t->mm->mmap_sem); +} diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h new file mode 100644 index 00000000000..2ccd46b6f6b --- /dev/null +++ b/arch/powerpc/kvm/44x_tlb.h @@ -0,0 +1,91 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __KVM_POWERPC_TLB_H__ +#define __KVM_POWERPC_TLB_H__ + +#include <linux/kvm_host.h> +#include <asm/mmu-44x.h> + +extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, + unsigned int pid, unsigned int as); +extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); +extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); + +/* TLB helper functions */ +static inline unsigned int get_tlb_size(const struct tlbe *tlbe) +{ + return (tlbe->word0 >> 4) & 0xf; +} + +static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) +{ + return tlbe->word0 & 0xfffffc00; +} + +static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) +{ + unsigned int pgsize = get_tlb_size(tlbe); + return 1 << 10 << (pgsize << 1); +} + +static inline gva_t get_tlb_end(const struct tlbe *tlbe) +{ + return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; +} + +static inline u64 get_tlb_raddr(const struct tlbe *tlbe) +{ + u64 word1 = tlbe->word1; + return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); +} + +static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) +{ + return tlbe->tid & 0xff; +} + +static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) +{ + return (tlbe->word0 >> 8) & 0x1; +} + +static inline unsigned int get_tlb_v(const struct tlbe *tlbe) +{ + return (tlbe->word0 >> 9) & 0x1; +} + +static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mmucr & 0xff; +} + +static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.mmucr >> 16) & 0x1; +} + +static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) +{ + unsigned int pgmask = get_tlb_bytes(tlbe) - 1; + + return get_tlb_raddr(tlbe) | (eaddr & pgmask); +} + +#endif /* __KVM_POWERPC_TLB_H__ */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig new file mode 100644 index 00000000000..6b076010213 --- /dev/null +++ b/arch/powerpc/kvm/Kconfig @@ -0,0 +1,42 @@ +# +# KVM configuration +# + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + bool "Kernel-based Virtual Machine (KVM) support" + depends on 44x && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + # We can only run on Book E hosts so far + select KVM_BOOKE_HOST + ---help--- + Support hosting virtualized guest machines. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_BOOKE_HOST + bool "KVM host support for Book E PowerPC processors" + depends on KVM && 44x + ---help--- + Provides host support for KVM on Book E PowerPC processors. Currently + this works on 440 processors only. + +source drivers/virtio/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile new file mode 100644 index 00000000000..d0d358d367e --- /dev/null +++ b/arch/powerpc/kvm/Makefile @@ -0,0 +1,15 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) + +kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o +obj-$(CONFIG_KVM) += kvm.o + +AFLAGS_booke_interrupts.o := -I$(obj) + +kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o +obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c new file mode 100644 index 00000000000..6d9884a6884 --- /dev/null +++ b/arch/powerpc/kvm/booke_guest.c @@ -0,0 +1,615 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <asm/cputable.h> +#include <asm/uaccess.h> +#include <asm/kvm_ppc.h> + +#include "44x_tlb.h" + +#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { "exits", VCPU_STAT(sum_exits) }, + { "mmio", VCPU_STAT(mmio_exits) }, + { "dcr", VCPU_STAT(dcr_exits) }, + { "sig", VCPU_STAT(signal_exits) }, + { "light", VCPU_STAT(light_exits) }, + { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, + { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, + { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, + { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) }, + { "sysc", VCPU_STAT(syscall_exits) }, + { "isi", VCPU_STAT(isi_exits) }, + { "dsi", VCPU_STAT(dsi_exits) }, + { "inst_emu", VCPU_STAT(emulated_inst_exits) }, + { "dec", VCPU_STAT(dec_exits) }, + { "ext_intr", VCPU_STAT(ext_intr_exits) }, + { NULL } +}; + +static const u32 interrupt_msr_mask[16] = { + [BOOKE_INTERRUPT_CRITICAL] = MSR_ME, + [BOOKE_INTERRUPT_MACHINE_CHECK] = 0, + [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME, + [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_DEBUG] = MSR_ME, +}; + +const unsigned char exception_priority[] = { + [BOOKE_INTERRUPT_DATA_STORAGE] = 0, + [BOOKE_INTERRUPT_INST_STORAGE] = 1, + [BOOKE_INTERRUPT_ALIGNMENT] = 2, + [BOOKE_INTERRUPT_PROGRAM] = 3, + [BOOKE_INTERRUPT_FP_UNAVAIL] = 4, + [BOOKE_INTERRUPT_SYSCALL] = 5, + [BOOKE_INTERRUPT_AP_UNAVAIL] = 6, + [BOOKE_INTERRUPT_DTLB_MISS] = 7, + [BOOKE_INTERRUPT_ITLB_MISS] = 8, + [BOOKE_INTERRUPT_MACHINE_CHECK] = 9, + [BOOKE_INTERRUPT_DEBUG] = 10, + [BOOKE_INTERRUPT_CRITICAL] = 11, + [BOOKE_INTERRUPT_WATCHDOG] = 12, + [BOOKE_INTERRUPT_EXTERNAL] = 13, + [BOOKE_INTERRUPT_FIT] = 14, + [BOOKE_INTERRUPT_DECREMENTER] = 15, +}; + +const unsigned char priority_exception[] = { + BOOKE_INTERRUPT_DATA_STORAGE, + BOOKE_INTERRUPT_INST_STORAGE, + BOOKE_INTERRUPT_ALIGNMENT, + BOOKE_INTERRUPT_PROGRAM, + BOOKE_INTERRUPT_FP_UNAVAIL, + BOOKE_INTERRUPT_SYSCALL, + BOOKE_INTERRUPT_AP_UNAVAIL, + BOOKE_INTERRUPT_DTLB_MISS, + BOOKE_INTERRUPT_ITLB_MISS, + BOOKE_INTERRUPT_MACHINE_CHECK, + BOOKE_INTERRUPT_DEBUG, + BOOKE_INTERRUPT_CRITICAL, + BOOKE_INTERRUPT_WATCHDOG, + BOOKE_INTERRUPT_EXTERNAL, + BOOKE_INTERRUPT_FIT, + BOOKE_INTERRUPT_DECREMENTER, +}; + + +void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) +{ + struct tlbe *tlbe; + int i; + + printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); + printk("| %2s | %3s | %8s | %8s | %8s |\n", + "nr", "tid", "word0", "word1", "word2"); + + for (i = 0; i < PPC44x_TLB_SIZE; i++) { + tlbe = &vcpu->arch.guest_tlb[i]; + if (tlbe->word0 & PPC44x_TLB_VALID) + printk(" G%2d | %02X | %08X | %08X | %08X |\n", + i, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + } + + for (i = 0; i < PPC44x_TLB_SIZE; i++) { + tlbe = &vcpu->arch.shadow_tlb[i]; + if (tlbe->word0 & PPC44x_TLB_VALID) + printk(" S%2d | %02X | %08X | %08X | %08X |\n", + i, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + } +} + +/* TODO: use vcpu_printf() */ +void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) +{ + int i; + + printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); + printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); + printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); + + printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); + + for (i = 0; i < 32; i += 4) { + printk("gpr%02d: %08x %08x %08x %08x\n", i, + vcpu->arch.gpr[i], + vcpu->arch.gpr[i+1], + vcpu->arch.gpr[i+2], + vcpu->arch.gpr[i+3]); + } +} + +/* Check if we are ready to deliver the interrupt */ +static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) +{ + int r; + + switch (interrupt) { + case BOOKE_INTERRUPT_CRITICAL: + r = vcpu->arch.msr & MSR_CE; + break; + case BOOKE_INTERRUPT_MACHINE_CHECK: + r = vcpu->arch.msr & MSR_ME; + break; + case BOOKE_INTERRUPT_EXTERNAL: + r = vcpu->arch.msr & MSR_EE; + break; + case BOOKE_INTERRUPT_DECREMENTER: + r = vcpu->arch.msr & MSR_EE; + break; + case BOOKE_INTERRUPT_FIT: + r = vcpu->arch.msr & MSR_EE; + break; + case BOOKE_INTERRUPT_WATCHDOG: + r = vcpu->arch.msr & MSR_CE; + break; + case BOOKE_INTERRUPT_DEBUG: + r = vcpu->arch.msr & MSR_DE; + break; + default: + r = 1; + } + + return r; +} + +static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) +{ + switch (interrupt) { + case BOOKE_INTERRUPT_DECREMENTER: + vcpu->arch.tsr |= TSR_DIS; + break; + } + + vcpu->arch.srr0 = vcpu->arch.pc; + vcpu->arch.srr1 = vcpu->arch.msr; + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt]; + kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]); +} + +/* Check pending exceptions and deliver one, if possible. */ +void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) +{ + unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned int exception; + unsigned int priority; + + priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); + while (priority <= BOOKE_MAX_INTERRUPT) { + exception = priority_exception[priority]; + if (kvmppc_can_deliver_interrupt(vcpu, exception)) { + kvmppc_clear_exception(vcpu, exception); + kvmppc_deliver_interrupt(vcpu, exception); + break; + } + + priority = find_next_bit(pending, + BITS_PER_BYTE * sizeof(*pending), + priority + 1); + } +} + +static int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + int r; + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* Future optimization: only reload non-volatiles if they were + * actually modified. */ + r = RESUME_GUEST_NV; + break; + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + /* We must reload nonvolatiles because "update" load/store + * instructions modify register state. */ + /* Future optimization: only reload non-volatiles if they were + * actually modified. */ + r = RESUME_HOST_NV; + break; + case EMULATE_FAIL: + /* XXX Deliver Program interrupt to guest. */ + printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, + vcpu->arch.last_inst); + r = RESUME_HOST; + break; + default: + BUG(); + } + + return r; +} + +/** + * kvmppc_handle_exit + * + * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) + */ +int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int exit_nr) +{ + enum emulation_result er; + int r = RESUME_HOST; + + local_irq_enable(); + + run->exit_reason = KVM_EXIT_UNKNOWN; + run->ready_for_interrupt_injection = 1; + + switch (exit_nr) { + case BOOKE_INTERRUPT_MACHINE_CHECK: + printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); + kvmppc_dump_vcpu(vcpu); + r = RESUME_HOST; + break; + + case BOOKE_INTERRUPT_EXTERNAL: + case BOOKE_INTERRUPT_DECREMENTER: + /* Since we switched IVPR back to the host's value, the host + * handled this interrupt the moment we enabled interrupts. + * Now we just offer it a chance to reschedule the guest. */ + + /* XXX At this point the TLB still holds our shadow TLB, so if + * we do reschedule the host will fault over it. Perhaps we + * should politely restore the host's entries to minimize + * misses before ceding control. */ + if (need_resched()) + cond_resched(); + if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) + vcpu->stat.dec_exits++; + else + vcpu->stat.ext_intr_exits++; + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_PROGRAM: + if (vcpu->arch.msr & MSR_PR) { + /* Program traps generated by user-level software must be handled + * by the guest kernel. */ + vcpu->arch.esr = vcpu->arch.fault_esr; + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); + r = RESUME_GUEST; + break; + } + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* Future optimization: only reload non-volatiles if + * they were actually modified by emulation. */ + vcpu->stat.emulated_inst_exits++; + r = RESUME_GUEST_NV; + break; + case EMULATE_DO_DCR: + run->exit_reason = KVM_EXIT_DCR; + r = RESUME_HOST; + break; + case EMULATE_FAIL: + /* XXX Deliver Program interrupt to guest. */ + printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", + __func__, vcpu->arch.pc, vcpu->arch.last_inst); + /* For debugging, encode the failing instruction and + * report it to userspace. */ + run->hw.hardware_exit_reason = ~0ULL << 32; + run->hw.hardware_exit_reason |= vcpu->arch.last_inst; + r = RESUME_HOST; + break; + default: + BUG(); + } + break; + + case BOOKE_INTERRUPT_DATA_STORAGE: + vcpu->arch.dear = vcpu->arch.fault_dear; + vcpu->arch.esr = vcpu->arch.fault_esr; + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.dsi_exits++; + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_INST_STORAGE: + vcpu->arch.esr = vcpu->arch.fault_esr; + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.isi_exits++; + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_SYSCALL: + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.syscall_exits++; + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_DTLB_MISS: { + struct tlbe *gtlbe; + unsigned long eaddr = vcpu->arch.fault_dear; + gfn_t gfn; + + /* Check the guest TLB. */ + gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); + if (!gtlbe) { + /* The guest didn't have a mapping for it. */ + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->arch.dear = vcpu->arch.fault_dear; + vcpu->arch.esr = vcpu->arch.fault_esr; + vcpu->stat.dtlb_real_miss_exits++; + r = RESUME_GUEST; + break; + } + + vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); + gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; + + if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { + /* The guest TLB had a mapping, but the shadow TLB + * didn't, and it is RAM. This could be because: + * a) the entry is mapping the host kernel, or + * b) the guest used a large mapping which we're faking + * Either way, we need to satisfy the fault without + * invoking the guest. */ + kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, + gtlbe->word2); + vcpu->stat.dtlb_virt_miss_exits++; + r = RESUME_GUEST; + } else { + /* Guest has mapped and accessed a page which is not + * actually RAM. */ + r = kvmppc_emulate_mmio(run, vcpu); + } + + break; + } + + case BOOKE_INTERRUPT_ITLB_MISS: { + struct tlbe *gtlbe; + unsigned long eaddr = vcpu->arch.pc; + gfn_t gfn; + + r = RESUME_GUEST; + + /* Check the guest TLB. */ + gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); + if (!gtlbe) { + /* The guest didn't have a mapping for it. */ + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.itlb_real_miss_exits++; + break; + } + + vcpu->stat.itlb_virt_miss_exits++; + + gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; + + if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { + /* The guest TLB had a mapping, but the shadow TLB + * didn't. This could be because: + * a) the entry is mapping the host kernel, or + * b) the guest used a large mapping which we're faking + * Either way, we need to satisfy the fault without + * invoking the guest. */ + kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, + gtlbe->word2); + } else { + /* Guest mapped and leaped at non-RAM! */ + kvmppc_queue_exception(vcpu, + BOOKE_INTERRUPT_MACHINE_CHECK); + } + + break; + } + + default: + printk(KERN_EMERG "exit_nr %d\n", exit_nr); + BUG(); + } + + local_irq_disable(); + + kvmppc_check_and_deliver_interrupts(vcpu); + + /* Do some exit accounting. */ + vcpu->stat.sum_exits++; + if (!(r & RESUME_HOST)) { + /* To avoid clobbering exit_reason, only check for signals if + * we aren't already exiting to userspace for some other + * reason. */ + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); + + vcpu->stat.signal_exits++; + } else { + vcpu->stat.light_exits++; + } + } else { + switch (run->exit_reason) { + case KVM_EXIT_MMIO: + vcpu->stat.mmio_exits++; + break; + case KVM_EXIT_DCR: + vcpu->stat.dcr_exits++; + break; + case KVM_EXIT_INTR: + vcpu->stat.signal_exits++; + break; + } + } + + return r; +} + +/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct tlbe *tlbe = &vcpu->arch.guest_tlb[0]; + + tlbe->tid = 0; + tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; + tlbe->word1 = 0; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; + + tlbe++; + tlbe->tid = 0; + tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; + tlbe->word1 = 0xef600000; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR + | PPC44x_TLB_I | PPC44x_TLB_G; + + vcpu->arch.pc = 0; + vcpu->arch.msr = 0; + vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ + + /* Eye-catching number so we know if the guest takes an interrupt + * before it's programmed its own IVPR. */ + vcpu->arch.ivpr = 0x55550000; + + /* Since the guest can directly access the timebase, it must know the + * real timebase frequency. Accordingly, it must see the state of + * CCR1[TCS]. */ + vcpu->arch.ccr1 = mfspr(SPRN_CCR1); + + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + regs->pc = vcpu->arch.pc; + regs->cr = vcpu->arch.cr; + regs->ctr = vcpu->arch.ctr; + regs->lr = vcpu->arch.lr; + regs->xer = vcpu->arch.xer; + regs->msr = vcpu->arch.msr; + regs->srr0 = vcpu->arch.srr0; + regs->srr1 = vcpu->arch.srr1; + regs->pid = vcpu->arch.pid; + regs->sprg0 = vcpu->arch.sprg0; + regs->sprg1 = vcpu->arch.sprg1; + regs->sprg2 = vcpu->arch.sprg2; + regs->sprg3 = vcpu->arch.sprg3; + regs->sprg5 = vcpu->arch.sprg4; + regs->sprg6 = vcpu->arch.sprg5; + regs->sprg7 = vcpu->arch.sprg6; + + for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) + regs->gpr[i] = vcpu->arch.gpr[i]; + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + vcpu->arch.pc = regs->pc; + vcpu->arch.cr = regs->cr; + vcpu->arch.ctr = regs->ctr; + vcpu->arch.lr = regs->lr; + vcpu->arch.xer = regs->xer; + vcpu->arch.msr = regs->msr; + vcpu->arch.srr0 = regs->srr0; + vcpu->arch.srr1 = regs->srr1; + vcpu->arch.sprg0 = regs->sprg0; + vcpu->arch.sprg1 = regs->sprg1; + vcpu->arch.sprg2 = regs->sprg2; + vcpu->arch.sprg3 = regs->sprg3; + vcpu->arch.sprg5 = regs->sprg4; + vcpu->arch.sprg6 = regs->sprg5; + vcpu->arch.sprg7 = regs->sprg6; + + for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) + vcpu->arch.gpr[i] = regs->gpr[i]; + + return 0; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -ENOTSUPP; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -ENOTSUPP; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOTSUPP; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOTSUPP; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + struct tlbe *gtlbe; + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); + if (index == -1) { + tr->valid = 0; + return 0; + } + + gtlbe = &vcpu->arch.guest_tlb[index]; + + tr->physical_address = tlb_xlate(gtlbe, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c new file mode 100644 index 00000000000..b480341bc31 --- /dev/null +++ b/arch/powerpc/kvm/booke_host.c @@ -0,0 +1,83 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/errno.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <asm/cacheflush.h> +#include <asm/kvm_ppc.h> + +unsigned long kvmppc_booke_handlers; + +static int kvmppc_booke_init(void) +{ + unsigned long ivor[16]; + unsigned long max_ivor = 0; + int i; + + /* We install our own exception handlers by hijacking IVPR. IVPR must + * be 16-bit aligned, so we need a 64KB allocation. */ + kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, + VCPU_SIZE_ORDER); + if (!kvmppc_booke_handlers) + return -ENOMEM; + + /* XXX make sure our handlers are smaller than Linux's */ + + /* Copy our interrupt handlers to match host IVORs. That way we don't + * have to swap the IVORs on every guest/host transition. */ + ivor[0] = mfspr(SPRN_IVOR0); + ivor[1] = mfspr(SPRN_IVOR1); + ivor[2] = mfspr(SPRN_IVOR2); + ivor[3] = mfspr(SPRN_IVOR3); + ivor[4] = mfspr(SPRN_IVOR4); + ivor[5] = mfspr(SPRN_IVOR5); + ivor[6] = mfspr(SPRN_IVOR6); + ivor[7] = mfspr(SPRN_IVOR7); + ivor[8] = mfspr(SPRN_IVOR8); + ivor[9] = mfspr(SPRN_IVOR9); + ivor[10] = mfspr(SPRN_IVOR10); + ivor[11] = mfspr(SPRN_IVOR11); + ivor[12] = mfspr(SPRN_IVOR12); + ivor[13] = mfspr(SPRN_IVOR13); + ivor[14] = mfspr(SPRN_IVOR14); + ivor[15] = mfspr(SPRN_IVOR15); + + for (i = 0; i < 16; i++) { + if (ivor[i] > max_ivor) + max_ivor = ivor[i]; + + memcpy((void *)kvmppc_booke_handlers + ivor[i], + kvmppc_handlers_start + i * kvmppc_handler_len, + kvmppc_handler_len); + } + flush_icache_range(kvmppc_booke_handlers, + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + + return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); +} + +static void __exit kvmppc_booke_exit(void) +{ + free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); + kvm_exit(); +} + +module_init(kvmppc_booke_init) +module_exit(kvmppc_booke_exit) diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S new file mode 100644 index 00000000000..3b653b5309b --- /dev/null +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -0,0 +1,436 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <asm/ppc_asm.h> +#include <asm/kvm_asm.h> +#include <asm/reg.h> +#include <asm/mmu-44x.h> +#include <asm/page.h> +#include <asm/asm-offsets.h> + +#define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS) + +#define VCPU_GPR(n) (VCPU_GPRS + (n * 4)) + +/* The host stack layout: */ +#define HOST_R1 0 /* Implied by stwu. */ +#define HOST_CALLEE_LR 4 +#define HOST_RUN 8 +/* r2 is special: it holds 'current', and it made nonvolatile in the + * kernel with the -ffixed-r2 gcc option. */ +#define HOST_R2 12 +#define HOST_NV_GPRS 16 +#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4)) +#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4) +#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */ +#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */ + +#define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \ + (1<<BOOKE_INTERRUPT_DTLB_MISS)) + +#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ + (1<<BOOKE_INTERRUPT_DTLB_MISS)) + +#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ + (1<<BOOKE_INTERRUPT_INST_STORAGE) | \ + (1<<BOOKE_INTERRUPT_PROGRAM) | \ + (1<<BOOKE_INTERRUPT_DTLB_MISS)) + +.macro KVM_HANDLER ivor_nr +_GLOBAL(kvmppc_handler_\ivor_nr) + /* Get pointer to vcpu and record exit number. */ + mtspr SPRN_SPRG0, r4 + mfspr r4, SPRN_SPRG1 + stw r5, VCPU_GPR(r5)(r4) + stw r6, VCPU_GPR(r6)(r4) + mfctr r5 + lis r6, kvmppc_resume_host@h + stw r5, VCPU_CTR(r4) + li r5, \ivor_nr + ori r6, r6, kvmppc_resume_host@l + mtctr r6 + bctr +.endm + +_GLOBAL(kvmppc_handlers_start) +KVM_HANDLER BOOKE_INTERRUPT_CRITICAL +KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK +KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE +KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE +KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL +KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT +KVM_HANDLER BOOKE_INTERRUPT_PROGRAM +KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL +KVM_HANDLER BOOKE_INTERRUPT_SYSCALL +KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL +KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER +KVM_HANDLER BOOKE_INTERRUPT_FIT +KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG +KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS +KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS +KVM_HANDLER BOOKE_INTERRUPT_DEBUG + +_GLOBAL(kvmppc_handler_len) + .long kvmppc_handler_1 - kvmppc_handler_0 + + +/* Registers: + * SPRG0: guest r4 + * r4: vcpu pointer + * r5: KVM exit number + */ +_GLOBAL(kvmppc_resume_host) + stw r3, VCPU_GPR(r3)(r4) + mfcr r3 + stw r3, VCPU_CR(r4) + stw r7, VCPU_GPR(r7)(r4) + stw r8, VCPU_GPR(r8)(r4) + stw r9, VCPU_GPR(r9)(r4) + + li r6, 1 + slw r6, r6, r5 + + /* Save the faulting instruction and all GPRs for emulation. */ + andi. r7, r6, NEED_INST_MASK + beq ..skip_inst_copy + mfspr r9, SPRN_SRR0 + mfmsr r8 + ori r7, r8, MSR_DS + mtmsr r7 + isync + lwz r9, 0(r9) + mtmsr r8 + isync + stw r9, VCPU_LAST_INST(r4) + + stw r15, VCPU_GPR(r15)(r4) + stw r16, VCPU_GPR(r16)(r4) + stw r17, VCPU_GPR(r17)(r4) + stw r18, VCPU_GPR(r18)(r4) + stw r19, VCPU_GPR(r19)(r4) + stw r20, VCPU_GPR(r20)(r4) + stw r21, VCPU_GPR(r21)(r4) + stw r22, VCPU_GPR(r22)(r4) + stw r23, VCPU_GPR(r23)(r4) + stw r24, VCPU_GPR(r24)(r4) + stw r25, VCPU_GPR(r25)(r4) + stw r26, VCPU_GPR(r26)(r4) + stw r27, VCPU_GPR(r27)(r4) + stw r28, VCPU_GPR(r28)(r4) + stw r29, VCPU_GPR(r29)(r4) + stw r30, VCPU_GPR(r30)(r4) + stw r31, VCPU_GPR(r31)(r4) +..skip_inst_copy: + + /* Also grab DEAR and ESR before the host can clobber them. */ + + andi. r7, r6, NEED_DEAR_MASK + beq ..skip_dear + mfspr r9, SPRN_DEAR + stw r9, VCPU_FAULT_DEAR(r4) +..skip_dear: + + andi. r7, r6, NEED_ESR_MASK + beq ..skip_esr + mfspr r9, SPRN_ESR + stw r9, VCPU_FAULT_ESR(r4) +..skip_esr: + + /* Save remaining volatile guest register state to vcpu. */ + stw r0, VCPU_GPR(r0)(r4) + stw r1, VCPU_GPR(r1)(r4) + stw r2, VCPU_GPR(r2)(r4) + stw r10, VCPU_GPR(r10)(r4) + stw r11, VCPU_GPR(r11)(r4) + stw r12, VCPU_GPR(r12)(r4) + stw r13, VCPU_GPR(r13)(r4) + stw r14, VCPU_GPR(r14)(r4) /* We need a NV GPR below. */ + mflr r3 + stw r3, VCPU_LR(r4) + mfxer r3 + stw r3, VCPU_XER(r4) + mfspr r3, SPRN_SPRG0 + stw r3, VCPU_GPR(r4)(r4) + mfspr r3, SPRN_SRR0 + stw r3, VCPU_PC(r4) + + /* Restore host stack pointer and PID before IVPR, since the host + * exception handlers use them. */ + lwz r1, VCPU_HOST_STACK(r4) + lwz r3, VCPU_HOST_PID(r4) + mtspr SPRN_PID, r3 + + /* Restore host IVPR before re-enabling interrupts. We cheat and know + * that Linux IVPR is always 0xc0000000. */ + lis r3, 0xc000 + mtspr SPRN_IVPR, r3 + + /* Switch to kernel stack and jump to handler. */ + LOAD_REG_ADDR(r3, kvmppc_handle_exit) + mtctr r3 + lwz r3, HOST_RUN(r1) + lwz r2, HOST_R2(r1) + mr r14, r4 /* Save vcpu pointer. */ + + bctrl /* kvmppc_handle_exit() */ + + /* Restore vcpu pointer and the nonvolatiles we used. */ + mr r4, r14 + lwz r14, VCPU_GPR(r14)(r4) + + /* Sometimes instruction emulation must restore complete GPR state. */ + andi. r5, r3, RESUME_FLAG_NV + beq ..skip_nv_load + lwz r15, VCPU_GPR(r15)(r4) + lwz r16, VCPU_GPR(r16)(r4) + lwz r17, VCPU_GPR(r17)(r4) + lwz r18, VCPU_GPR(r18)(r4) + lwz r19, VCPU_GPR(r19)(r4) + lwz r20, VCPU_GPR(r20)(r4) + lwz r21, VCPU_GPR(r21)(r4) + lwz r22, VCPU_GPR(r22)(r4) + lwz r23, VCPU_GPR(r23)(r4) + lwz r24, VCPU_GPR(r24)(r4) + lwz r25, VCPU_GPR(r25)(r4) + lwz r26, VCPU_GPR(r26)(r4) + lwz r27, VCPU_GPR(r27)(r4) + lwz r28, VCPU_GPR(r28)(r4) + lwz r29, VCPU_GPR(r29)(r4) + lwz r30, VCPU_GPR(r30)(r4) + lwz r31, VCPU_GPR(r31)(r4) +..skip_nv_load: + + /* Should we return to the guest? */ + andi. r5, r3, RESUME_FLAG_HOST + beq lightweight_exit + + srawi r3, r3, 2 /* Shift -ERR back down. */ + +heavyweight_exit: + /* Not returning to guest. */ + + /* We already saved guest volatile register state; now save the + * non-volatiles. */ + stw r15, VCPU_GPR(r15)(r4) + stw r16, VCPU_GPR(r16)(r4) + stw r17, VCPU_GPR(r17)(r4) + stw r18, VCPU_GPR(r18)(r4) + stw r19, VCPU_GPR(r19)(r4) + stw r20, VCPU_GPR(r20)(r4) + stw r21, VCPU_GPR(r21)(r4) + stw r22, VCPU_GPR(r22)(r4) + stw r23, VCPU_GPR(r23)(r4) + stw r24, VCPU_GPR(r24)(r4) + stw r25, VCPU_GPR(r25)(r4) + stw r26, VCPU_GPR(r26)(r4) + stw r27, VCPU_GPR(r27)(r4) + stw r28, VCPU_GPR(r28)(r4) + stw r29, VCPU_GPR(r29)(r4) + stw r30, VCPU_GPR(r30)(r4) + stw r31, VCPU_GPR(r31)(r4) + + /* Load host non-volatile register state from host stack. */ + lwz r14, HOST_NV_GPR(r14)(r1) + lwz r15, HOST_NV_GPR(r15)(r1) + lwz r16, HOST_NV_GPR(r16)(r1) + lwz r17, HOST_NV_GPR(r17)(r1) + lwz r18, HOST_NV_GPR(r18)(r1) + lwz r19, HOST_NV_GPR(r19)(r1) + lwz r20, HOST_NV_GPR(r20)(r1) + lwz r21, HOST_NV_GPR(r21)(r1) + lwz r22, HOST_NV_GPR(r22)(r1) + lwz r23, HOST_NV_GPR(r23)(r1) + lwz r24, HOST_NV_GPR(r24)(r1) + lwz r25, HOST_NV_GPR(r25)(r1) + lwz r26, HOST_NV_GPR(r26)(r1) + lwz r27, HOST_NV_GPR(r27)(r1) + lwz r28, HOST_NV_GPR(r28)(r1) + lwz r29, HOST_NV_GPR(r29)(r1) + lwz r30, HOST_NV_GPR(r30)(r1) + lwz r31, HOST_NV_GPR(r31)(r1) + + /* Return to kvm_vcpu_run(). */ + lwz r4, HOST_STACK_LR(r1) + addi r1, r1, HOST_STACK_SIZE + mtlr r4 + /* r3 still contains the return code from kvmppc_handle_exit(). */ + blr + + +/* Registers: + * r3: kvm_run pointer + * r4: vcpu pointer + */ +_GLOBAL(__kvmppc_vcpu_run) + stwu r1, -HOST_STACK_SIZE(r1) + stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + + /* Save host state to stack. */ + stw r3, HOST_RUN(r1) + mflr r3 + stw r3, HOST_STACK_LR(r1) + + /* Save host non-volatile register state to stack. */ + stw r14, HOST_NV_GPR(r14)(r1) + stw r15, HOST_NV_GPR(r15)(r1) + stw r16, HOST_NV_GPR(r16)(r1) + stw r17, HOST_NV_GPR(r17)(r1) + stw r18, HOST_NV_GPR(r18)(r1) + stw r19, HOST_NV_GPR(r19)(r1) + stw r20, HOST_NV_GPR(r20)(r1) + stw r21, HOST_NV_GPR(r21)(r1) + stw r22, HOST_NV_GPR(r22)(r1) + stw r23, HOST_NV_GPR(r23)(r1) + stw r24, HOST_NV_GPR(r24)(r1) + stw r25, HOST_NV_GPR(r25)(r1) + stw r26, HOST_NV_GPR(r26)(r1) + stw r27, HOST_NV_GPR(r27)(r1) + stw r28, HOST_NV_GPR(r28)(r1) + stw r29, HOST_NV_GPR(r29)(r1) + stw r30, HOST_NV_GPR(r30)(r1) + stw r31, HOST_NV_GPR(r31)(r1) + + /* Load guest non-volatiles. */ + lwz r14, VCPU_GPR(r14)(r4) + lwz r15, VCPU_GPR(r15)(r4) + lwz r16, VCPU_GPR(r16)(r4) + lwz r17, VCPU_GPR(r17)(r4) + lwz r18, VCPU_GPR(r18)(r4) + lwz r19, VCPU_GPR(r19)(r4) + lwz r20, VCPU_GPR(r20)(r4) + lwz r21, VCPU_GPR(r21)(r4) + lwz r22, VCPU_GPR(r22)(r4) + lwz r23, VCPU_GPR(r23)(r4) + lwz r24, VCPU_GPR(r24)(r4) + lwz r25, VCPU_GPR(r25)(r4) + lwz r26, VCPU_GPR(r26)(r4) + lwz r27, VCPU_GPR(r27)(r4) + lwz r28, VCPU_GPR(r28)(r4) + lwz r29, VCPU_GPR(r29)(r4) + lwz r30, VCPU_GPR(r30)(r4) + lwz r31, VCPU_GPR(r31)(r4) + +lightweight_exit: + stw r2, HOST_R2(r1) + + mfspr r3, SPRN_PID + stw r3, VCPU_HOST_PID(r4) + lwz r3, VCPU_PID(r4) + mtspr SPRN_PID, r3 + + /* Prevent all TLB updates. */ + mfmsr r5 + lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h + ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l + andc r6, r5, r6 + mtmsr r6 + + /* Save the host's non-pinned TLB mappings, and load the guest mappings + * over them. Leave the host's "pinned" kernel mappings in place. */ + /* XXX optimization: use generation count to avoid swapping unmodified + * entries. */ + mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ + lis r8, tlb_44x_hwater@ha + lwz r8, tlb_44x_hwater@l(r8) + addi r3, r4, VCPU_HOST_TLB - 4 + addi r9, r4, VCPU_SHADOW_TLB - 4 + li r6, 0 +1: + /* Save host entry. */ + tlbre r7, r6, PPC44x_TLB_PAGEID + mfspr r5, SPRN_MMUCR + stwu r5, 4(r3) + stwu r7, 4(r3) + tlbre r7, r6, PPC44x_TLB_XLAT + stwu r7, 4(r3) + tlbre r7, r6, PPC44x_TLB_ATTRIB + stwu r7, 4(r3) + /* Load guest entry. */ + lwzu r7, 4(r9) + mtspr SPRN_MMUCR, r7 + lwzu r7, 4(r9) + tlbwe r7, r6, PPC44x_TLB_PAGEID + lwzu r7, 4(r9) + tlbwe r7, r6, PPC44x_TLB_XLAT + lwzu r7, 4(r9) + tlbwe r7, r6, PPC44x_TLB_ATTRIB + /* Increment index. */ + addi r6, r6, 1 + cmpw r6, r8 + blt 1b + mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */ + + iccci 0, 0 /* XXX hack */ + + /* Load some guest volatiles. */ + lwz r0, VCPU_GPR(r0)(r4) + lwz r2, VCPU_GPR(r2)(r4) + lwz r9, VCPU_GPR(r9)(r4) + lwz r10, VCPU_GPR(r10)(r4) + lwz r11, VCPU_GPR(r11)(r4) + lwz r12, VCPU_GPR(r12)(r4) + lwz r13, VCPU_GPR(r13)(r4) + lwz r3, VCPU_LR(r4) + mtlr r3 + lwz r3, VCPU_XER(r4) + mtxer r3 + + /* Switch the IVPR. XXX If we take a TLB miss after this we're screwed, + * so how do we make sure vcpu won't fault? */ + lis r8, kvmppc_booke_handlers@ha + lwz r8, kvmppc_booke_handlers@l(r8) + mtspr SPRN_IVPR, r8 + + /* Save vcpu pointer for the exception handlers. */ + mtspr SPRN_SPRG1, r4 + + /* Can't switch the stack pointer until after IVPR is switched, + * because host interrupt handlers would get confused. */ + lwz r1, VCPU_GPR(r1)(r4) + + /* XXX handle USPRG0 */ + /* Host interrupt handlers may have clobbered these guest-readable + * SPRGs, so we need to reload them here with the guest's values. */ + lwz r3, VCPU_SPRG4(r4) + mtspr SPRN_SPRG4, r3 + lwz r3, VCPU_SPRG5(r4) + mtspr SPRN_SPRG5, r3 + lwz r3, VCPU_SPRG6(r4) + mtspr SPRN_SPRG6, r3 + lwz r3, VCPU_SPRG7(r4) + mtspr SPRN_SPRG7, r3 + + /* Finish loading guest volatiles and jump to guest. */ + lwz r3, VCPU_CTR(r4) + mtctr r3 + lwz r3, VCPU_CR(r4) + mtcr r3 + lwz r5, VCPU_GPR(r5)(r4) + lwz r6, VCPU_GPR(r6)(r4) + lwz r7, VCPU_GPR(r7)(r4) + lwz r8, VCPU_GPR(r8)(r4) + lwz r3, VCPU_PC(r4) + mtsrr0 r3 + lwz r3, VCPU_MSR(r4) + oris r3, r3, KVMPPC_MSR_MASK@h + ori r3, r3, KVMPPC_MSR_MASK@l + mtsrr1 r3 + lwz r3, VCPU_GPR(r3)(r4) + lwz r4, VCPU_GPR(r4)(r4) + rfi diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c new file mode 100644 index 00000000000..a03fe0c8069 --- /dev/null +++ b/arch/powerpc/kvm/emulate.c @@ -0,0 +1,760 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/jiffies.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm_host.h> + +#include <asm/dcr.h> +#include <asm/dcr-regs.h> +#include <asm/time.h> +#include <asm/byteorder.h> +#include <asm/kvm_ppc.h> + +#include "44x_tlb.h" + +/* Instruction decoding */ +static inline unsigned int get_op(u32 inst) +{ + return inst >> 26; +} + +static inline unsigned int get_xop(u32 inst) +{ + return (inst >> 1) & 0x3ff; +} + +static inline unsigned int get_sprn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_dcrn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_rt(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_rs(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_ra(u32 inst) +{ + return (inst >> 16) & 0x1f; +} + +static inline unsigned int get_rb(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_rc(u32 inst) +{ + return inst & 0x1; +} + +static inline unsigned int get_ws(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_d(u32 inst) +{ + return inst & 0xffff; +} + +static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct tlbe *tlbe) +{ + gpa_t gpa; + + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) +{ + u64 eaddr; + u64 raddr; + u64 asid; + u32 flags; + struct tlbe *tlbe; + unsigned int ra; + unsigned int rs; + unsigned int ws; + unsigned int index; + + ra = get_ra(inst); + rs = get_rs(inst); + ws = get_ws(inst); + + index = vcpu->arch.gpr[ra]; + if (index > PPC44x_TLB_SIZE) { + printk("%s: index %d\n", __func__, index); + kvmppc_dump_vcpu(vcpu); + return EMULATE_FAIL; + } + + tlbe = &vcpu->arch.guest_tlb[index]; + + /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ + if (tlbe->word0 & PPC44x_TLB_VALID) { + eaddr = get_tlb_eaddr(tlbe); + asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; + kvmppc_mmu_invalidate(vcpu, eaddr, asid); + } + + switch (ws) { + case PPC44x_TLB_PAGEID: + tlbe->tid = vcpu->arch.mmucr & 0xff; + tlbe->word0 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_XLAT: + tlbe->word1 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_ATTRIB: + tlbe->word2 = vcpu->arch.gpr[rs]; + break; + + default: + return EMULATE_FAIL; + } + + if (tlbe_is_host_safe(vcpu, tlbe)) { + eaddr = get_tlb_eaddr(tlbe); + raddr = get_tlb_raddr(tlbe); + asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; + flags = tlbe->word2 & 0xffff; + + /* Create a 4KB mapping on the host. If the guest wanted a + * large page, only the first 4KB is mapped here and the rest + * are mapped on the fly. */ + kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); + } + + return EMULATE_DONE; +} + +static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.tcr & TCR_DIE) { + /* The decrementer ticks at the same rate as the timebase, so + * that's how we convert the guest DEC value to the number of + * host ticks. */ + unsigned long nr_jiffies; + + nr_jiffies = vcpu->arch.dec / tb_ticks_per_jiffy; + mod_timer(&vcpu->arch.dec_timer, + get_jiffies_64() + nr_jiffies); + } else { + del_timer(&vcpu->arch.dec_timer); + } +} + +static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.srr0; + kvmppc_set_msr(vcpu, vcpu->arch.srr1); +} + +/* XXX to do: + * lhax + * lhaux + * lswx + * lswi + * stswx + * stswi + * lha + * lhau + * lmw + * stmw + * + * XXX is_bigendian should depend on MMU mapping or MSR[LE] + */ +int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + u32 inst = vcpu->arch.last_inst; + u32 ea; + int ra; + int rb; + int rc; + int rs; + int rt; + int sprn; + int dcrn; + enum emulation_result emulated = EMULATE_DONE; + int advance = 1; + + switch (get_op(inst)) { + case 3: /* trap */ + printk("trap!\n"); + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); + advance = 0; + break; + + case 19: + switch (get_xop(inst)) { + case 50: /* rfi */ + kvmppc_emul_rfi(vcpu); + advance = 0; + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case 31: + switch (get_xop(inst)) { + + case 83: /* mfmsr */ + rt = get_rt(inst); + vcpu->arch.gpr[rt] = vcpu->arch.msr; + break; + + case 87: /* lbzx */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case 131: /* wrtee */ + rs = get_rs(inst); + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (vcpu->arch.gpr[rs] & MSR_EE); + break; + + case 146: /* mtmsr */ + rs = get_rs(inst); + kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); + break; + + case 163: /* wrteei */ + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (inst & MSR_EE); + break; + + case 215: /* stbx */ + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 1, 1); + break; + + case 247: /* stbux */ + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 1, 1); + vcpu->arch.gpr[rs] = ea; + break; + + case 279: /* lhzx */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case 311: /* lhzux */ + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + vcpu->arch.gpr[ra] = ea; + break; + + case 323: /* mfdcr */ + dcrn = get_dcrn(inst); + rt = get_rt(inst); + + /* The guest may access CPR0 registers to determine the timebase + * frequency, and it must know the real host frequency because it + * can directly access the timebase registers. + * + * It would be possible to emulate those accesses in userspace, + * but userspace can really only figure out the end frequency. + * We could decompose that into the factors that compute it, but + * that's tricky math, and it's easier to just report the real + * CPR0 values. + */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; + break; + case DCRN_CPR0_CONFIG_DATA: + local_irq_disable(); + mtdcr(DCRN_CPR0_CONFIG_ADDR, + vcpu->arch.cpr0_cfgaddr); + vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); + local_irq_enable(); + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = 0; + run->dcr.is_write = 0; + vcpu->arch.io_gpr = rt; + vcpu->arch.dcr_needed = 1; + emulated = EMULATE_DO_DCR; + } + + break; + + case 339: /* mfspr */ + sprn = get_sprn(inst); + rt = get_rt(inst); + + switch (sprn) { + case SPRN_SRR0: + vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; + case SPRN_SRR1: + vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; + case SPRN_MMUCR: + vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; + case SPRN_PID: + vcpu->arch.gpr[rt] = vcpu->arch.pid; break; + case SPRN_IVPR: + vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; + case SPRN_CCR0: + vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; + case SPRN_CCR1: + vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; + case SPRN_PVR: + vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; + case SPRN_DEAR: + vcpu->arch.gpr[rt] = vcpu->arch.dear; break; + case SPRN_ESR: + vcpu->arch.gpr[rt] = vcpu->arch.esr; break; + case SPRN_DBCR0: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; + case SPRN_DBCR1: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; + + /* Note: mftb and TBRL/TBWL are user-accessible, so + * the guest can always access the real TB anyways. + * In fact, we probably will never see these traps. */ + case SPRN_TBWL: + vcpu->arch.gpr[rt] = mftbl(); break; + case SPRN_TBWU: + vcpu->arch.gpr[rt] = mftbu(); break; + + case SPRN_SPRG0: + vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break; + case SPRN_SPRG1: + vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break; + case SPRN_SPRG2: + vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break; + case SPRN_SPRG3: + vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break; + /* Note: SPRG4-7 are user-readable, so we don't get + * a trap. */ + + case SPRN_IVOR0: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break; + case SPRN_IVOR1: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break; + case SPRN_IVOR2: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break; + case SPRN_IVOR3: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break; + case SPRN_IVOR4: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break; + case SPRN_IVOR5: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break; + case SPRN_IVOR6: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break; + case SPRN_IVOR7: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break; + case SPRN_IVOR8: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break; + case SPRN_IVOR9: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break; + case SPRN_IVOR10: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break; + case SPRN_IVOR11: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break; + case SPRN_IVOR12: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break; + case SPRN_IVOR13: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break; + case SPRN_IVOR14: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break; + case SPRN_IVOR15: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break; + + default: + printk("mfspr: unknown spr %x\n", sprn); + vcpu->arch.gpr[rt] = 0; + break; + } + break; + + case 407: /* sthx */ + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 2, 1); + break; + + case 439: /* sthux */ + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 2, 1); + vcpu->arch.gpr[ra] = ea; + break; + + case 451: /* mtdcr */ + dcrn = get_dcrn(inst); + rs = get_rs(inst); + + /* emulate some access in kernel */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = vcpu->arch.gpr[rs]; + run->dcr.is_write = 1; + vcpu->arch.dcr_needed = 1; + emulated = EMULATE_DO_DCR; + } + + break; + + case 467: /* mtspr */ + sprn = get_sprn(inst); + rs = get_rs(inst); + switch (sprn) { + case SPRN_SRR0: + vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; + case SPRN_SRR1: + vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; + case SPRN_MMUCR: + vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; + case SPRN_PID: + vcpu->arch.pid = vcpu->arch.gpr[rs]; break; + case SPRN_CCR0: + vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; + case SPRN_CCR1: + vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; + case SPRN_DEAR: + vcpu->arch.dear = vcpu->arch.gpr[rs]; break; + case SPRN_ESR: + vcpu->arch.esr = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR0: + vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR1: + vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; + + /* XXX We need to context-switch the timebase for + * watchdog and FIT. */ + case SPRN_TBWL: break; + case SPRN_TBWU: break; + + case SPRN_DEC: + vcpu->arch.dec = vcpu->arch.gpr[rs]; + kvmppc_emulate_dec(vcpu); + break; + + case SPRN_TSR: + vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; + + case SPRN_TCR: + vcpu->arch.tcr = vcpu->arch.gpr[rs]; + kvmppc_emulate_dec(vcpu); + break; + + case SPRN_SPRG0: + vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG1: + vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG2: + vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG3: + vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; + + /* Note: SPRG4-7 are user-readable. These values are + * loaded into the real SPRGs when resuming the + * guest. */ + case SPRN_SPRG4: + vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG5: + vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG6: + vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG7: + vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; + + case SPRN_IVPR: + vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR0: + vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR1: + vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR2: + vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR3: + vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR4: + vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR5: + vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR6: + vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR7: + vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR8: + vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR9: + vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR10: + vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR11: + vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR12: + vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR13: + vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR14: + vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR15: + vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break; + + default: + printk("mtspr: unknown spr %x\n", sprn); + emulated = EMULATE_FAIL; + break; + } + break; + + case 470: /* dcbi */ + /* Do nothing. The guest is performing dcbi because + * hardware DMA is not snooped by the dcache, but + * emulated DMA either goes through the dcache as + * normal writes, or the host kernel has handled dcache + * coherence. */ + break; + + case 534: /* lwbrx */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); + break; + + case 566: /* tlbsync */ + break; + + case 662: /* stwbrx */ + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 4, 0); + break; + + case 978: /* tlbwe */ + emulated = kvmppc_emul_tlbwe(vcpu, inst); + break; + + case 914: { /* tlbsx */ + int index; + unsigned int as = get_mmucr_sts(vcpu); + unsigned int pid = get_mmucr_stid(vcpu); + + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + rc = get_rc(inst); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); + if (rc) { + if (index < 0) + vcpu->arch.cr &= ~0x20000000; + else + vcpu->arch.cr |= 0x20000000; + } + vcpu->arch.gpr[rt] = index; + + } + break; + + case 790: /* lhbrx */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); + break; + + case 918: /* sthbrx */ + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 2, 0); + break; + + case 966: /* iccci */ + break; + + default: + printk("unknown: op %d xop %d\n", get_op(inst), + get_xop(inst)); + emulated = EMULATE_FAIL; + break; + } + break; + + case 32: /* lwz */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + case 33: /* lwzu */ + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + case 34: /* lbz */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case 35: /* lbzu */ + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + case 36: /* stw */ + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 4, 1); + break; + + case 37: /* stwu */ + ra = get_ra(inst); + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 4, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + case 38: /* stb */ + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 1, 1); + break; + + case 39: /* stbu */ + ra = get_ra(inst); + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 1, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + case 40: /* lhz */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case 41: /* lhzu */ + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + case 44: /* sth */ + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 2, 1); + break; + + case 45: /* sthu */ + ra = get_ra(inst); + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 2, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + default: + printk("unknown op %d\n", get_op(inst)); + emulated = EMULATE_FAIL; + break; + } + + if (advance) + vcpu->arch.pc += 4; /* Advance past emulated instruction. */ + + return emulated; +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c new file mode 100644 index 00000000000..bad40bd2d3a --- /dev/null +++ b/arch/powerpc/kvm/powerpc.c @@ -0,0 +1,436 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <asm/cputable.h> +#include <asm/uaccess.h> +#include <asm/kvm_ppc.h> + + +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn; +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *v) +{ + /* XXX implement me */ + return 0; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) +{ + return 1; +} + + +int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + int r; + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* Future optimization: only reload non-volatiles if they were + * actually modified. */ + r = RESUME_GUEST_NV; + break; + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + /* We must reload nonvolatiles because "update" load/store + * instructions modify register state. */ + /* Future optimization: only reload non-volatiles if they were + * actually modified. */ + r = RESUME_HOST_NV; + break; + case EMULATE_FAIL: + /* XXX Deliver Program interrupt to guest. */ + printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, + vcpu->arch.last_inst); + r = RESUME_HOST; + break; + default: + BUG(); + } + + return r; +} + +void kvm_arch_hardware_enable(void *garbage) +{ +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + int r; + + if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) + r = 0; + else + r = -ENOTSUPP; + + *(int *)rtn = r; +} + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm; + + kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); + if (!kvm) + return ERR_PTR(-ENOMEM); + + return kvm; +} + +static void kvmppc_free_vcpus(struct kvm *kvm) +{ + unsigned int i; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_vcpu_free(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kvmppc_free_vcpus(kvm); + kvm_free_physmem(kvm); + kfree(kvm); +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_USER_MEMORY: + r = 1; + break; + default: + r = 0; + break; + } + return r; + +} + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + return 0; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvm_vcpu *vcpu; + int err; + + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) { + err = -ENOMEM; + goto out; + } + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + return vcpu; + +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); +out: + return ERR_PTR(err); +} + +void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_arch_vcpu_free(vcpu); +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; + + return test_bit(priority, &vcpu->arch.pending_exceptions); +} + +static void kvmppc_decrementer_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); +} + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + setup_timer(&vcpu->arch.dec_timer, kvmppc_decrementer_func, + (unsigned long)vcpu); + + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +void decache_vcpus_on_cpu(int cpu) +{ +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + return -ENOTSUPP; +} + +static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + *gpr = run->dcr.data; +} + +static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + + if (run->mmio.len > sizeof(*gpr)) { + printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); + return; + } + + if (vcpu->arch.mmio_is_bigendian) { + switch (run->mmio.len) { + case 4: *gpr = *(u32 *)run->mmio.data; break; + case 2: *gpr = *(u16 *)run->mmio.data; break; + case 1: *gpr = *(u8 *)run->mmio.data; break; + } + } else { + /* Convert BE data from userland back to LE. */ + switch (run->mmio.len) { + case 4: *gpr = ld_le32((u32 *)run->mmio.data); break; + case 2: *gpr = ld_le16((u16 *)run->mmio.data); break; + case 1: *gpr = *(u8 *)run->mmio.data; break; + } + } +} + +int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, int is_bigendian) +{ + if (bytes > sizeof(run->mmio.data)) { + printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, + run->mmio.len); + } + + run->mmio.phys_addr = vcpu->arch.paddr_accessed; + run->mmio.len = bytes; + run->mmio.is_write = 0; + + vcpu->arch.io_gpr = rt; + vcpu->arch.mmio_is_bigendian = is_bigendian; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 0; + + return EMULATE_DO_MMIO; +} + +int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + u32 val, unsigned int bytes, int is_bigendian) +{ + void *data = run->mmio.data; + + if (bytes > sizeof(run->mmio.data)) { + printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, + run->mmio.len); + } + + run->mmio.phys_addr = vcpu->arch.paddr_accessed; + run->mmio.len = bytes; + run->mmio.is_write = 1; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 1; + + /* Store the value at the lowest bytes in 'data'. */ + if (is_bigendian) { + switch (bytes) { + case 4: *(u32 *)data = val; break; + case 2: *(u16 *)data = val; break; + case 1: *(u8 *)data = val; break; + } + } else { + /* Store LE value into 'data'. */ + switch (bytes) { + case 4: st_le32(data, val); break; + case 2: st_le16(data, val); break; + case 1: *(u8 *)data = val; break; + } + } + + return EMULATE_DO_MMIO; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int r; + sigset_t sigsaved; + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + if (vcpu->mmio_needed) { + if (!vcpu->mmio_is_write) + kvmppc_complete_mmio_load(vcpu, run); + vcpu->mmio_needed = 0; + } else if (vcpu->arch.dcr_needed) { + if (!vcpu->arch.dcr_is_write) + kvmppc_complete_dcr_load(vcpu, run); + vcpu->arch.dcr_needed = 0; + } + + kvmppc_check_and_deliver_interrupts(vcpu); + + local_irq_disable(); + kvm_guest_enter(); + r = __kvmppc_vcpu_run(run, vcpu); + kvm_guest_exit(); + local_irq_enable(); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + return r; +} + +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) +{ + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + long r; + + switch (ioctl) { + case KVM_INTERRUPT: { + struct kvm_interrupt irq; + r = -EFAULT; + if (copy_from_user(&irq, argp, sizeof(irq))) + goto out; + r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); + break; + } + default: + r = -EINVAL; + } + +out: + return r; +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +{ + return -ENOTSUPP; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + long r; + + switch (ioctl) { + default: + r = -EINVAL; + } + + return r; +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +void kvm_arch_exit(void) +{ +} diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index ada249bf977..ce10e2b1b90 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -202,7 +202,7 @@ adjust_total_lowmem(void) cam_max_size = max_lowmem_size; /* adjust lowmem size to max_lowmem_size */ - ram = min(max_lowmem_size, total_lowmem); + ram = min(max_lowmem_size, (phys_addr_t)total_lowmem); /* Calculate CAM values */ __cam0 = 1UL << 2 * (__ilog2(ram) / 2); diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index e10d76a860d..ddeaf9e38ad 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -191,7 +191,7 @@ _GLOBAL(add_hash_page) add r3,r3,r0 /* note create_hpte trims to 24 bits */ #ifdef CONFIG_SMP - rlwinm r8,r1,0,0,18 /* use cpu number to make tag */ + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) /* use cpu number to make tag */ lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */ oris r8,r8,12 #endif /* CONFIG_SMP */ @@ -526,7 +526,7 @@ _GLOBAL(flush_hash_pages) #ifdef CONFIG_SMP addis r9,r7,mmu_hash_lock@ha addi r9,r9,mmu_hash_lock@l - rlwinm r8,r1,0,0,18 + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) add r8,r8,r7 lwz r8,TI_CPU(r8) oris r8,r8,9 diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 47325f23c51..1952b4d3fa7 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -59,7 +59,10 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long total_memory; unsigned long total_lowmem; -phys_addr_t memstart_addr; +phys_addr_t memstart_addr = (phys_addr_t)~0ull; +EXPORT_SYMBOL(memstart_addr); +phys_addr_t kernstart_addr; +EXPORT_SYMBOL(kernstart_addr); phys_addr_t lowmem_end_addr; int boot_mapsize; @@ -68,14 +71,6 @@ unsigned long agp_special_page; EXPORT_SYMBOL(agp_special_page); #endif -#ifdef CONFIG_HIGHMEM -pte_t *kmap_pte; -pgprot_t kmap_prot; - -EXPORT_SYMBOL(kmap_prot); -EXPORT_SYMBOL(kmap_pte); -#endif - void MMU_init(void); /* XXX should be in current.h -- paulus */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 698bd000f98..c5ac532a016 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -72,7 +72,8 @@ #warning TASK_SIZE is smaller than it needs to be. #endif -phys_addr_t memstart_addr; +phys_addr_t memstart_addr = ~0; +phys_addr_t kernstart_addr; void free_initmem(void) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 16def4dcff6..d9e37f365b5 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -45,6 +45,7 @@ #include <asm/tlb.h> #include <asm/sections.h> #include <asm/vdso.h> +#include <asm/fixmap.h> #include "mmu_decl.h" @@ -57,6 +58,20 @@ int init_bootmem_done; int mem_init_done; unsigned long memory_limit; +#ifdef CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; + +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); + +static inline pte_t *virt_to_kpte(unsigned long vaddr) +{ + return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), + vaddr), vaddr), vaddr); +} +#endif + int page_is_ram(unsigned long pfn) { unsigned long paddr = (pfn << PAGE_SHIFT); @@ -95,15 +110,6 @@ EXPORT_SYMBOL(phys_mem_access_prot); #ifdef CONFIG_MEMORY_HOTPLUG -void online_page(struct page *page) -{ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalram_pages++; - num_physpages++; -} - #ifdef CONFIG_NUMA int memory_add_physaddr_to_nid(u64 start) { @@ -216,7 +222,7 @@ void __init do_init_bootmem(void) unsigned long total_pages; int boot_mapsize; - max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; #ifdef CONFIG_HIGHMEM total_pages = total_lowmem >> PAGE_SHIFT; @@ -232,7 +238,8 @@ void __init do_init_bootmem(void) start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); - boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); + min_low_pfn = MEMORY_START >> PAGE_SHIFT; + boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); /* Add active regions with valid PFNs */ for (i = 0; i < lmb.memory.cnt; i++) { @@ -310,14 +317,19 @@ void __init paging_init(void) unsigned long top_of_ram = lmb_end_of_DRAM(); unsigned long max_zone_pfns[MAX_NR_ZONES]; +#ifdef CONFIG_PPC32 + unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); + unsigned long end = __fix_to_virt(FIX_HOLE); + + for (; v < end; v += PAGE_SIZE) + map_page(v, 0, 0); /* XXX gross */ +#endif + #ifdef CONFIG_HIGHMEM map_page(PKMAP_BASE, 0, 0); /* XXX gross */ - pkmap_page_table = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k - (PKMAP_BASE), PKMAP_BASE), PKMAP_BASE), PKMAP_BASE); - map_page(KMAP_FIX_BEGIN, 0, 0); /* XXX gross */ - kmap_pte = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k - (KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), - KMAP_FIX_BEGIN); + pkmap_page_table = virt_to_kpte(PKMAP_BASE); + + kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); kmap_prot = PAGE_KERNEL; #endif /* CONFIG_HIGHMEM */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 1efd631211e..dc704da363e 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -18,6 +18,7 @@ #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/lmb.h> +#include <linux/of.h> #include <asm/sparsemem.h> #include <asm/prom.h> #include <asm/system.h> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 64c44bcc68d..80d1babb230 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -29,6 +29,7 @@ #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/fixmap.h> #include <asm/io.h> #include "mmu_decl.h" @@ -387,3 +388,25 @@ void kernel_map_pages(struct page *page, int numpages, int enable) change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); } #endif /* CONFIG_DEBUG_PAGEALLOC */ + +static int fixmaps; +unsigned long FIXADDR_TOP = 0xfffff000; +EXPORT_SYMBOL(FIXADDR_TOP); + +void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + + map_page(address, phys, flags); + fixmaps++; +} + +void __this_fixmap_does_not_exist(void) +{ + WARN_ON(1); +} diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index 18b8ebe930d..5e1e8cf14e7 100644 --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -3,11 +3,12 @@ * * Initial author: Xianghua Xiao <x.xiao@freescale.com> * Recode: Jason Jin <jason.jin@freescale.com> + * York Sun <yorksun@freescale.com> * * Rewrite the interrupt routing. remove the 8259PIC support, * All the integrated device in ULI use sideband interrupt. * - * Copyright 2007 Freescale Semiconductor Inc. + * Copyright 2008 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -38,6 +39,8 @@ #include <sysdev/fsl_pci.h> #include <sysdev/fsl_soc.h> +static unsigned char *pixis_bdcfg0, *pixis_arch; + static struct of_device_id __initdata mpc8610_ids[] = { { .compatible = "fsl,mpc8610-immr", }, {} @@ -52,8 +55,7 @@ static int __init mpc8610_declare_of_platform_devices(void) } machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices); -static void __init -mpc86xx_hpcd_init_irq(void) +static void __init mpc86xx_hpcd_init_irq(void) { struct mpic *mpic1; struct device_node *np; @@ -161,12 +163,159 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5288, final_uli5288); #endif /* CONFIG_PCI */ -static void __init -mpc86xx_hpcd_setup_arch(void) +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) + +static u32 get_busfreq(void) { -#ifdef CONFIG_PCI - struct device_node *np; + struct device_node *node; + + u32 fs_busfreq = 0; + node = of_find_node_by_type(NULL, "cpu"); + if (node) { + unsigned int size; + const unsigned int *prop = + of_get_property(node, "bus-frequency", &size); + if (prop) + fs_busfreq = *prop; + of_node_put(node); + }; + return fs_busfreq; +} + +unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel, + int monitor_port) +{ + static const unsigned long pixelformat[][3] = { + {0x88882317, 0x88083218, 0x65052119}, + {0x88883316, 0x88082219, 0x65053118}, + }; + unsigned int pix_fmt, arch_monitor; + + arch_monitor = ((*pixis_arch == 0x01) && (monitor_port == 0))? 0 : 1; + /* DVI port for board version 0x01 */ + + if (bits_per_pixel == 32) + pix_fmt = pixelformat[arch_monitor][0]; + else if (bits_per_pixel == 24) + pix_fmt = pixelformat[arch_monitor][1]; + else if (bits_per_pixel == 16) + pix_fmt = pixelformat[arch_monitor][2]; + else + pix_fmt = pixelformat[1][0]; + + return pix_fmt; +} + +void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base) +{ + int i; + if (monitor_port == 2) { /* dual link LVDS */ + for (i = 0; i < 256*3; i++) + gamma_table_base[i] = (gamma_table_base[i] << 2) | + ((gamma_table_base[i] >> 6) & 0x03); + } +} + +void mpc8610hpcd_set_monitor_port(int monitor_port) +{ + static const u8 bdcfg[] = {0xBD, 0xB5, 0xA5}; + if (monitor_port < 3) + *pixis_bdcfg0 = bdcfg[monitor_port]; +} + +void mpc8610hpcd_set_pixel_clock(unsigned int pixclock) +{ + u32 __iomem *clkdvdr; + u32 temp; + /* variables for pixel clock calcs */ + ulong bestval, bestfreq, speed_ccb, minpixclock, maxpixclock; + ulong pixval; + long err; + int i; + + clkdvdr = ioremap(get_immrbase() + 0xe0800, sizeof(u32)); + if (!clkdvdr) { + printk(KERN_ERR "Err: can't map clock divider register!\n"); + return; + } + + /* Pixel Clock configuration */ + pr_debug("DIU: Bus Frequency = %d\n", get_busfreq()); + speed_ccb = get_busfreq(); + + /* Calculate the pixel clock with the smallest error */ + /* calculate the following in steps to avoid overflow */ + pr_debug("DIU pixclock in ps - %d\n", pixclock); + temp = 1000000000/pixclock; + temp *= 1000; + pixclock = temp; + pr_debug("DIU pixclock freq - %u\n", pixclock); + + temp = pixclock * 5 / 100; + pr_debug("deviation = %d\n", temp); + minpixclock = pixclock - temp; + maxpixclock = pixclock + temp; + pr_debug("DIU minpixclock - %lu\n", minpixclock); + pr_debug("DIU maxpixclock - %lu\n", maxpixclock); + pixval = speed_ccb/pixclock; + pr_debug("DIU pixval = %lu\n", pixval); + + err = 100000000; + bestval = pixval; + pr_debug("DIU bestval = %lu\n", bestval); + + bestfreq = 0; + for (i = -1; i <= 1; i++) { + temp = speed_ccb / ((pixval+i) + 1); + pr_debug("DIU test pixval i= %d, pixval=%lu, temp freq. = %u\n", + i, pixval, temp); + if ((temp < minpixclock) || (temp > maxpixclock)) + pr_debug("DIU exceeds monitor range (%lu to %lu)\n", + minpixclock, maxpixclock); + else if (abs(temp - pixclock) < err) { + pr_debug("Entered the else if block %d\n", i); + err = abs(temp - pixclock); + bestval = pixval+i; + bestfreq = temp; + } + } + + pr_debug("DIU chose = %lx\n", bestval); + pr_debug("DIU error = %ld\n NomPixClk ", err); + pr_debug("DIU: Best Freq = %lx\n", bestfreq); + /* Modify PXCLK in GUTS CLKDVDR */ + pr_debug("DIU: Current value of CLKDVDR = 0x%08x\n", (*clkdvdr)); + temp = (*clkdvdr) & 0x2000FFFF; + *clkdvdr = temp; /* turn off clock */ + *clkdvdr = temp | 0x80000000 | (((bestval) & 0x1F) << 16); + pr_debug("DIU: Modified value of CLKDVDR = 0x%08x\n", (*clkdvdr)); + iounmap(clkdvdr); +} + +ssize_t mpc8610hpcd_show_monitor_port(int monitor_port, char *buf) +{ + return snprintf(buf, PAGE_SIZE, + "%c0 - DVI\n" + "%c1 - Single link LVDS\n" + "%c2 - Dual link LVDS\n", + monitor_port == 0 ? '*' : ' ', + monitor_port == 1 ? '*' : ' ', + monitor_port == 2 ? '*' : ' '); +} + +int mpc8610hpcd_set_sysfs_monitor_port(int val) +{ + return val < 3 ? val : 0; +} + #endif + +static void __init mpc86xx_hpcd_setup_arch(void) +{ + struct resource r; + struct device_node *np; + unsigned char *pixis; + if (ppc_md.progress) ppc_md.progress("mpc86xx_hpcd_setup_arch()", 0); @@ -183,6 +332,30 @@ mpc86xx_hpcd_setup_arch(void) } } #endif +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) + preallocate_diu_videomemory(); + diu_ops.get_pixel_format = mpc8610hpcd_get_pixel_format; + diu_ops.set_gamma_table = mpc8610hpcd_set_gamma_table; + diu_ops.set_monitor_port = mpc8610hpcd_set_monitor_port; + diu_ops.set_pixel_clock = mpc8610hpcd_set_pixel_clock; + diu_ops.show_monitor_port = mpc8610hpcd_show_monitor_port; + diu_ops.set_sysfs_monitor_port = mpc8610hpcd_set_sysfs_monitor_port; +#endif + + np = of_find_compatible_node(NULL, NULL, "fsl,fpga-pixis"); + if (np) { + of_address_to_resource(np, 0, &r); + of_node_put(np); + pixis = ioremap(r.start, 32); + if (!pixis) { + printk(KERN_ERR "Err: can't map FPGA cfg register!\n"); + return; + } + pixis_bdcfg0 = pixis + 8; + pixis_arch = pixis + 1; + } else + printk(KERN_ERR "Err: " + "can't find device node 'fsl,fpga-pixis'\n"); printk("MPC86xx HPCD board from Freescale Semiconductor\n"); } @@ -200,8 +373,7 @@ static int __init mpc86xx_hpcd_probe(void) return 0; } -static long __init -mpc86xx_time_init(void) +static long __init mpc86xx_time_init(void) { unsigned int temp; diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index f38c50b4ce5..87454c52697 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -45,7 +45,6 @@ source "arch/powerpc/platforms/powermac/Kconfig" source "arch/powerpc/platforms/prep/Kconfig" source "arch/powerpc/platforms/maple/Kconfig" source "arch/powerpc/platforms/pasemi/Kconfig" -source "arch/powerpc/platforms/celleb/Kconfig" source "arch/powerpc/platforms/ps3/Kconfig" source "arch/powerpc/platforms/cell/Kconfig" source "arch/powerpc/platforms/8xx/Kconfig" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 5fc7fac10e9..f7efaa925a1 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -220,8 +220,8 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-128)" - range 2 128 + int "Maximum number of CPUs (2-1024)" + range 2 1024 depends on SMP default "32" if PPC64 default "4" diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index a984894466d..423a0234dc3 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -24,5 +24,4 @@ obj-$(CONFIG_PPC_MAPLE) += maple/ obj-$(CONFIG_PPC_PASEMI) += pasemi/ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ -obj-$(CONFIG_PPC_CELLEB) += celleb/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 2f169991896..3959fcfe731 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -25,6 +25,19 @@ config PPC_IBM_CELL_BLADE select PPC_UDBG_16550 select UDBG_RTAS_CONSOLE +config PPC_CELLEB + bool "Toshiba's Cell Reference Set 'Celleb' Architecture" + depends on PPC_MULTIPLATFORM && PPC64 + select PPC_CELL + select PPC_CELL_NATIVE + select PPC_RTAS + select PPC_INDIRECT_IO + select PPC_OF_PLATFORM_PCI + select HAS_TXX9_SERIAL + select PPC_UDBG_BEAT + select USB_OHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_MMIO + menu "Cell Broadband Engine options" depends on PPC_CELL diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index c89964c6fb1..c2a7e4e5ddf 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \ cbe_regs.o spider-pic.o \ - pervasive.o pmu.o io-workarounds.o + pervasive.o pmu.o io-workarounds.o \ + spider-pci.o obj-$(CONFIG_CBE_RAS) += ras.o obj-$(CONFIG_CBE_THERM) += cbe_thermal.o @@ -26,3 +27,20 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ spufs/ obj-$(CONFIG_PCI_MSI) += axon_msi.o + + +# celleb stuff +ifeq ($(CONFIG_PPC_CELLEB),y) +obj-y += celleb_setup.o \ + celleb_pci.o celleb_scc_epci.o \ + celleb_scc_pciex.o \ + celleb_scc_uhc.o \ + io-workarounds.o spider-pci.o \ + beat.o beat_htab.o beat_hvCall.o \ + beat_interrupt.o beat_iommu.o + +obj-$(CONFIG_SMP) += beat_smp.o +obj-$(CONFIG_PPC_UDBG_BEAT) += beat_udbg.o +obj-$(CONFIG_SERIAL_TXX9) += celleb_scc_sio.o +obj-$(CONFIG_SPU_BASE) += beat_spu_priv1.o +endif diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index d95e71dee91..c39f5c225f2 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -123,7 +123,7 @@ static struct axon_msic *find_msi_translator(struct pci_dev *dev) return NULL; } - for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { + for (; dn; dn = of_get_next_parent(dn)) { ph = of_get_property(dn, "msi-translator", NULL); if (ph) break; @@ -169,7 +169,7 @@ static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type) static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) { - struct device_node *dn, *tmp; + struct device_node *dn; struct msi_desc *entry; int len; const u32 *prop; @@ -182,7 +182,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) entry = list_first_entry(&dev->msi_list, struct msi_desc, list); - for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { + for (; dn; dn = of_get_next_parent(dn)) { if (entry->msi_attrib.is_64) { prop = of_get_property(dn, "msi-address-64", &len); if (prop) diff --git a/arch/powerpc/platforms/celleb/beat.c b/arch/powerpc/platforms/cell/beat.c index b64b171f245..48c690ea65d 100644 --- a/arch/powerpc/platforms/celleb/beat.c +++ b/arch/powerpc/platforms/cell/beat.c @@ -33,7 +33,7 @@ #include "beat_wrapper.h" #include "beat.h" -#include "interrupt.h" +#include "beat_interrupt.h" static int beat_pm_poweroff_flag; diff --git a/arch/powerpc/platforms/celleb/beat.h b/arch/powerpc/platforms/cell/beat.h index 32c8efcedc8..32c8efcedc8 100644 --- a/arch/powerpc/platforms/celleb/beat.h +++ b/arch/powerpc/platforms/cell/beat.h diff --git a/arch/powerpc/platforms/celleb/htab.c b/arch/powerpc/platforms/cell/beat_htab.c index 81467ff055c..81467ff055c 100644 --- a/arch/powerpc/platforms/celleb/htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c diff --git a/arch/powerpc/platforms/celleb/hvCall.S b/arch/powerpc/platforms/cell/beat_hvCall.S index 74c81744894..74c81744894 100644 --- a/arch/powerpc/platforms/celleb/hvCall.S +++ b/arch/powerpc/platforms/cell/beat_hvCall.S diff --git a/arch/powerpc/platforms/celleb/interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index 69562a86787..192a9350937 100644 --- a/arch/powerpc/platforms/celleb/interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -26,7 +26,7 @@ #include <asm/machdep.h> -#include "interrupt.h" +#include "beat_interrupt.h" #include "beat_wrapper.h" #define MAX_IRQS NR_IRQS diff --git a/arch/powerpc/platforms/celleb/interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h index b470fd0051f..b470fd0051f 100644 --- a/arch/powerpc/platforms/celleb/interrupt.h +++ b/arch/powerpc/platforms/cell/beat_interrupt.h diff --git a/arch/powerpc/platforms/celleb/iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c index 93b0efddd65..93b0efddd65 100644 --- a/arch/powerpc/platforms/celleb/iommu.c +++ b/arch/powerpc/platforms/cell/beat_iommu.c diff --git a/arch/powerpc/platforms/celleb/smp.c b/arch/powerpc/platforms/cell/beat_smp.c index a7631250aeb..26efc204c47 100644 --- a/arch/powerpc/platforms/celleb/smp.c +++ b/arch/powerpc/platforms/cell/beat_smp.c @@ -37,7 +37,7 @@ #include <asm/machdep.h> #include <asm/udbg.h> -#include "interrupt.h" +#include "beat_interrupt.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) diff --git a/arch/powerpc/platforms/celleb/spu_priv1.c b/arch/powerpc/platforms/cell/beat_spu_priv1.c index bcc17f7fe8a..bcc17f7fe8a 100644 --- a/arch/powerpc/platforms/celleb/spu_priv1.c +++ b/arch/powerpc/platforms/cell/beat_spu_priv1.c diff --git a/arch/powerpc/platforms/celleb/beat_syscall.h b/arch/powerpc/platforms/cell/beat_syscall.h index 8580dc7e179..8580dc7e179 100644 --- a/arch/powerpc/platforms/celleb/beat_syscall.h +++ b/arch/powerpc/platforms/cell/beat_syscall.h diff --git a/arch/powerpc/platforms/celleb/udbg_beat.c b/arch/powerpc/platforms/cell/beat_udbg.c index 6b418f6b617..6b418f6b617 100644 --- a/arch/powerpc/platforms/celleb/udbg_beat.c +++ b/arch/powerpc/platforms/cell/beat_udbg.c diff --git a/arch/powerpc/platforms/celleb/beat_wrapper.h b/arch/powerpc/platforms/cell/beat_wrapper.h index b47dfda48d0..b47dfda48d0 100644 --- a/arch/powerpc/platforms/celleb/beat_wrapper.h +++ b/arch/powerpc/platforms/cell/beat_wrapper.h diff --git a/arch/powerpc/platforms/celleb/pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 51b390d34e4..f39a3b2a166 100644 --- a/arch/powerpc/platforms/celleb/pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -37,12 +37,11 @@ #include <asm/io.h> #include <asm/irq.h> #include <asm/prom.h> -#include <asm/machdep.h> #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> -#include "pci.h" -#include "interrupt.h" +#include "io-workarounds.h" +#include "celleb_pci.h" #define MAX_PCI_DEVICES 32 #define MAX_PCI_FUNCTIONS 8 @@ -190,7 +189,7 @@ static int celleb_fake_pci_read_config(struct pci_bus *bus, static int celleb_fake_pci_write_config(struct pci_bus *bus, - unsigned int devfn, int where, int size, u32 val) + unsigned int devfn, int where, int size, u32 val) { char *config; struct device_node *node; @@ -457,33 +456,42 @@ static int __init celleb_setup_fake_pci(struct device_node *dev, return 0; } -void __init fake_pci_workaround_init(struct pci_controller *phb) -{ - /** - * We will add fake pci bus to scc_pci_bus for the purpose to improve - * I/O Macro performance. But device-tree and device drivers - * are not ready to use address with a token. - */ - - /* celleb_pci_add_one(phb, NULL); */ -} +static struct celleb_phb_spec celleb_fake_pci_spec __initdata = { + .setup = celleb_setup_fake_pci, +}; static struct of_device_id celleb_phb_match[] __initdata = { { .name = "pci-pseudo", - .data = celleb_setup_fake_pci, + .data = &celleb_fake_pci_spec, }, { .name = "epci", - .data = celleb_setup_epci, + .data = &celleb_epci_spec, + }, { + .name = "pcie", + .data = &celleb_pciex_spec, }, { }, }; +static int __init celleb_io_workaround_init(struct pci_controller *phb, + struct celleb_phb_spec *phb_spec) +{ + if (phb_spec->ops) { + iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init, + phb_spec->iowa_data); + io_workaround_init(); + } + + return 0; +} + int __init celleb_setup_phb(struct pci_controller *phb) { struct device_node *dev = phb->dn; const struct of_device_id *match; - int (*setup_func)(struct device_node *, struct pci_controller *); + struct celleb_phb_spec *phb_spec; + int rc; match = of_match_node(celleb_phb_match, dev); if (!match) @@ -492,8 +500,12 @@ int __init celleb_setup_phb(struct pci_controller *phb) phb_set_bus_ranges(dev, phb); phb->buid = 1; - setup_func = match->data; - return (*setup_func)(dev, phb); + phb_spec = match->data; + rc = (*phb_spec->setup)(dev, phb); + if (rc) + return 1; + + return celleb_io_workaround_init(phb, phb_spec); } int celleb_pci_probe_mode(struct pci_bus *bus) diff --git a/arch/powerpc/platforms/celleb/pci.h b/arch/powerpc/platforms/cell/celleb_pci.h index 5d5544ffedd..4cba1523ec5 100644 --- a/arch/powerpc/platforms/celleb/pci.h +++ b/arch/powerpc/platforms/cell/celleb_pci.h @@ -27,16 +27,19 @@ #include <asm/prom.h> #include <asm/ppc-pci.h> +#include "io-workarounds.h" + +struct celleb_phb_spec { + int (*setup)(struct device_node *, struct pci_controller *); + struct ppc_pci_io *ops; + int (*iowa_init)(struct iowa_bus *, void *); + void *iowa_data; +}; + extern int celleb_setup_phb(struct pci_controller *); extern int celleb_pci_probe_mode(struct pci_bus *); -extern int celleb_setup_epci(struct device_node *, struct pci_controller *); - -extern void *celleb_dummy_page_va; -extern int __init celleb_pci_workaround_init(void); -extern void __init celleb_pci_add_one(struct pci_controller *, - void (*)(struct pci_controller *)); -extern void fake_pci_workaround_init(struct pci_controller *); -extern void epci_workaround_init(struct pci_controller *); +extern struct celleb_phb_spec celleb_epci_spec; +extern struct celleb_phb_spec celleb_pciex_spec; #endif /* _CELLEB_PCI_H */ diff --git a/arch/powerpc/platforms/celleb/scc.h b/arch/powerpc/platforms/cell/celleb_scc.h index 6be1542a6e6..b596a711c34 100644 --- a/arch/powerpc/platforms/celleb/scc.h +++ b/arch/powerpc/platforms/cell/celleb_scc.h @@ -125,6 +125,93 @@ /* bits for SCC_EPCI_CNTOPT */ #define SCC_EPCI_CNTOPT_O2PMB 0x00000002 +/* SCC PCIEXC SMMIO registers */ +#define PEXCADRS 0x000 +#define PEXCWDATA 0x004 +#define PEXCRDATA 0x008 +#define PEXDADRS 0x010 +#define PEXDCMND 0x014 +#define PEXDWDATA 0x018 +#define PEXDRDATA 0x01c +#define PEXREQID 0x020 +#define PEXTIDMAP 0x024 +#define PEXINTMASK 0x028 +#define PEXINTSTS 0x02c +#define PEXAERRMASK 0x030 +#define PEXAERRSTS 0x034 +#define PEXPRERRMASK 0x040 +#define PEXPRERRSTS 0x044 +#define PEXPRERRID01 0x048 +#define PEXPRERRID23 0x04c +#define PEXVDMASK 0x050 +#define PEXVDSTS 0x054 +#define PEXRCVCPLIDA 0x060 +#define PEXLENERRIDA 0x068 +#define PEXPHYPLLST 0x070 +#define PEXDMRDEN0 0x100 +#define PEXDMRDADR0 0x104 +#define PEXDMRDENX 0x110 +#define PEXDMRDADRX 0x114 +#define PEXECMODE 0xf00 +#define PEXMAEA(n) (0xf50 + (8 * n)) +#define PEXMAEC(n) (0xf54 + (8 * n)) +#define PEXCCRCTRL 0xff0 + +/* SCC PCIEXC bits and shifts for PEXCADRS */ +#define PEXCADRS_BYTE_EN_SHIFT 20 +#define PEXCADRS_CMD_SHIFT 16 +#define PEXCADRS_CMD_READ (0xa << PEXCADRS_CMD_SHIFT) +#define PEXCADRS_CMD_WRITE (0xb << PEXCADRS_CMD_SHIFT) + +/* SCC PCIEXC shifts for PEXDADRS */ +#define PEXDADRS_BUSNO_SHIFT 20 +#define PEXDADRS_DEVNO_SHIFT 15 +#define PEXDADRS_FUNCNO_SHIFT 12 + +/* SCC PCIEXC bits and shifts for PEXDCMND */ +#define PEXDCMND_BYTE_EN_SHIFT 4 +#define PEXDCMND_IO_READ 0x2 +#define PEXDCMND_IO_WRITE 0x3 +#define PEXDCMND_CONFIG_READ 0xa +#define PEXDCMND_CONFIG_WRITE 0xb + +/* SCC PCIEXC bits for PEXPHYPLLST */ +#define PEXPHYPLLST_PEXPHYAPLLST 0x00000001 + +/* SCC PCIEXC bits for PEXECMODE */ +#define PEXECMODE_ALL_THROUGH 0x00000000 +#define PEXECMODE_ALL_8BIT 0x00550155 +#define PEXECMODE_ALL_16BIT 0x00aa02aa + +/* SCC PCIEXC bits for PEXCCRCTRL */ +#define PEXCCRCTRL_PEXIPCOREEN 0x00040000 +#define PEXCCRCTRL_PEXIPCONTEN 0x00020000 +#define PEXCCRCTRL_PEXPHYPLLEN 0x00010000 +#define PEXCCRCTRL_PCIEXCAOCKEN 0x00000100 + +/* SCC PCIEXC port configuration registers */ +#define PEXTCERRCHK 0x21c +#define PEXTAMAPB0 0x220 +#define PEXTAMAPL0 0x224 +#define PEXTAMAPB(n) (PEXTAMAPB0 + 8 * (n)) +#define PEXTAMAPL(n) (PEXTAMAPL0 + 8 * (n)) +#define PEXCHVC0P 0x500 +#define PEXCHVC0NP 0x504 +#define PEXCHVC0C 0x508 +#define PEXCDVC0P 0x50c +#define PEXCDVC0NP 0x510 +#define PEXCDVC0C 0x514 +#define PEXCHVCXP 0x518 +#define PEXCHVCXNP 0x51c +#define PEXCHVCXC 0x520 +#define PEXCDVCXP 0x524 +#define PEXCDVCXNP 0x528 +#define PEXCDVCXC 0x52c +#define PEXCTTRG 0x530 +#define PEXTSCTRL 0x700 +#define PEXTSSTS 0x704 +#define PEXSKPCTRL 0x708 + /* UHC registers */ #define SCC_UHC_CKRCTRL 0xff0 #define SCC_UHC_ECMODE 0xf00 diff --git a/arch/powerpc/platforms/celleb/scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c index a999b393f6f..08c285b10e3 100644 --- a/arch/powerpc/platforms/celleb/scc_epci.c +++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c @@ -30,23 +30,17 @@ #include <asm/io.h> #include <asm/irq.h> #include <asm/prom.h> -#include <asm/machdep.h> #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> -#include "scc.h" -#include "pci.h" -#include "interrupt.h" +#include "celleb_scc.h" +#include "celleb_pci.h" #define MAX_PCI_DEVICES 32 #define MAX_PCI_FUNCTIONS 8 #define iob() __asm__ __volatile__("eieio; sync":::"memory") -struct epci_private { - dma_addr_t dummy_page_da; -}; - static inline PCI_IO_ADDR celleb_epci_get_epci_base( struct pci_controller *hose) { @@ -71,42 +65,6 @@ static inline PCI_IO_ADDR celleb_epci_get_epci_cfg( return hose->cfg_data; } -static void scc_epci_dummy_read(struct pci_controller *hose) -{ - PCI_IO_ADDR epci_base; - u32 val; - - epci_base = celleb_epci_get_epci_base(hose); - - val = in_be32(epci_base + SCC_EPCI_WATRP); - iosync(); - - return; -} - -void __init epci_workaround_init(struct pci_controller *hose) -{ - PCI_IO_ADDR epci_base; - PCI_IO_ADDR reg; - struct epci_private *private = hose->private_data; - - BUG_ON(!private); - - private->dummy_page_da = dma_map_single(hose->parent, - celleb_dummy_page_va, PAGE_SIZE, DMA_FROM_DEVICE); - if (private->dummy_page_da == DMA_ERROR_CODE) { - printk(KERN_ERR "EPCI: dummy read disabled. " - "Map dummy page failed.\n"); - return; - } - - celleb_pci_add_one(hose, scc_epci_dummy_read); - epci_base = celleb_epci_get_epci_base(hose); - - reg = epci_base + SCC_EPCI_DUMYRADR; - out_be32(reg, private->dummy_page_da); -} - static inline void clear_and_disable_master_abort_interrupt( struct pci_controller *hose) { @@ -151,10 +109,8 @@ static int celleb_epci_check_abort(struct pci_controller *hose, return PCIBIOS_SUCCESSFUL; } -static PCI_IO_ADDR celleb_epci_make_config_addr( - struct pci_bus *bus, - struct pci_controller *hose, - unsigned int devfn, int where) +static PCI_IO_ADDR celleb_epci_make_config_addr(struct pci_bus *bus, + struct pci_controller *hose, unsigned int devfn, int where) { PCI_IO_ADDR addr; @@ -425,8 +381,8 @@ static int __init celleb_epci_init(struct pci_controller *hose) return 0; } -int __init celleb_setup_epci(struct device_node *node, - struct pci_controller *hose) +static int __init celleb_setup_epci(struct device_node *node, + struct pci_controller *hose) { struct resource r; @@ -450,8 +406,7 @@ int __init celleb_setup_epci(struct device_node *node, if (!hose->cfg_addr) goto error; pr_debug("EPCI: cfg_addr map 0x%016lx->0x%016lx + 0x%016lx\n", - r.start, (unsigned long)hose->cfg_addr, - (r.end - r.start + 1)); + r.start, (unsigned long)hose->cfg_addr, (r.end - r.start + 1)); if (of_address_to_resource(node, 2, &r)) goto error; @@ -459,14 +414,7 @@ int __init celleb_setup_epci(struct device_node *node, if (!hose->cfg_data) goto error; pr_debug("EPCI: cfg_data map 0x%016lx->0x%016lx + 0x%016lx\n", - r.start, (unsigned long)hose->cfg_data, - (r.end - r.start + 1)); - - hose->private_data = kzalloc(sizeof(struct epci_private), GFP_KERNEL); - if (hose->private_data == NULL) { - printk(KERN_ERR "EPCI: no memory for private data.\n"); - goto error; - } + r.start, (unsigned long)hose->cfg_data, (r.end - r.start + 1)); hose->ops = &celleb_epci_ops; celleb_epci_init(hose); @@ -474,8 +422,6 @@ int __init celleb_setup_epci(struct device_node *node, return 0; error: - kfree(hose->private_data); - if (hose->cfg_addr) iounmap(hose->cfg_addr); @@ -483,3 +429,10 @@ error: iounmap(hose->cfg_data); return 1; } + +struct celleb_phb_spec celleb_epci_spec __initdata = { + .setup = celleb_setup_epci, + .ops = &spiderpci_ops, + .iowa_init = &spiderpci_iowa_init, + .iowa_data = (void *)0, +}; diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c new file mode 100644 index 00000000000..31da84c458d --- /dev/null +++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c @@ -0,0 +1,547 @@ +/* + * Support for Celleb PCI-Express. + * + * (C) Copyright 2007-2008 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/delay.h> +#include <linux/interrupt.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/iommu.h> +#include <asm/byteorder.h> + +#include "celleb_scc.h" +#include "celleb_pci.h" + +#define PEX_IN(base, off) in_be32((void __iomem *)(base) + (off)) +#define PEX_OUT(base, off, data) out_be32((void __iomem *)(base) + (off), (data)) + +static void scc_pciex_io_flush(struct iowa_bus *bus) +{ + (void)PEX_IN(bus->phb->cfg_addr, PEXDMRDEN0); +} + +/* + * Memory space access to device on PCIEX + */ +#define PCIEX_MMIO_READ(name, ret) \ +static ret scc_pciex_##name(const PCI_IO_ADDR addr) \ +{ \ + ret val = __do_##name(addr); \ + scc_pciex_io_flush(iowa_mem_find_bus(addr)); \ + return val; \ +} + +#define PCIEX_MMIO_READ_STR(name) \ +static void scc_pciex_##name(const PCI_IO_ADDR addr, void *buf, \ + unsigned long count) \ +{ \ + __do_##name(addr, buf, count); \ + scc_pciex_io_flush(iowa_mem_find_bus(addr)); \ +} + +PCIEX_MMIO_READ(readb, u8) +PCIEX_MMIO_READ(readw, u16) +PCIEX_MMIO_READ(readl, u32) +PCIEX_MMIO_READ(readq, u64) +PCIEX_MMIO_READ(readw_be, u16) +PCIEX_MMIO_READ(readl_be, u32) +PCIEX_MMIO_READ(readq_be, u64) +PCIEX_MMIO_READ_STR(readsb) +PCIEX_MMIO_READ_STR(readsw) +PCIEX_MMIO_READ_STR(readsl) + +static void scc_pciex_memcpy_fromio(void *dest, const PCI_IO_ADDR src, + unsigned long n) +{ + __do_memcpy_fromio(dest, src, n); + scc_pciex_io_flush(iowa_mem_find_bus(src)); +} + +/* + * I/O port access to devices on PCIEX. + */ + +static inline unsigned long get_bus_address(struct pci_controller *phb, + unsigned long port) +{ + return port - ((unsigned long)(phb->io_base_virt) - _IO_BASE); +} + +static u32 scc_pciex_read_port(struct pci_controller *phb, + unsigned long port, int size) +{ + unsigned int byte_enable; + unsigned int cmd, shift; + unsigned long addr; + u32 data, ret; + + BUG_ON(((port & 0x3ul) + size) > 4); + + addr = get_bus_address(phb, port); + shift = addr & 0x3ul; + byte_enable = ((1 << size) - 1) << shift; + cmd = PEXDCMND_IO_READ | (byte_enable << PEXDCMND_BYTE_EN_SHIFT); + PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul)); + PEX_OUT(phb->cfg_addr, PEXDCMND, cmd); + data = PEX_IN(phb->cfg_addr, PEXDRDATA); + ret = (data >> (shift * 8)) & (0xFFFFFFFF >> ((4 - size) * 8)); + + pr_debug("PCIEX:PIO READ:port=0x%lx, addr=0x%lx, size=%d, be=%x," + " cmd=%x, data=%x, ret=%x\n", port, addr, size, byte_enable, + cmd, data, ret); + + return ret; +} + +static void scc_pciex_write_port(struct pci_controller *phb, + unsigned long port, int size, u32 val) +{ + unsigned int byte_enable; + unsigned int cmd, shift; + unsigned long addr; + u32 data; + + BUG_ON(((port & 0x3ul) + size) > 4); + + addr = get_bus_address(phb, port); + shift = addr & 0x3ul; + byte_enable = ((1 << size) - 1) << shift; + cmd = PEXDCMND_IO_WRITE | (byte_enable << PEXDCMND_BYTE_EN_SHIFT); + data = (val & (0xFFFFFFFF >> (4 - size) * 8)) << (shift * 8); + PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul)); + PEX_OUT(phb->cfg_addr, PEXDCMND, cmd); + PEX_OUT(phb->cfg_addr, PEXDWDATA, data); + + pr_debug("PCIEX:PIO WRITE:port=0x%lx, addr=%lx, size=%d, val=%x," + " be=%x, cmd=%x, data=%x\n", port, addr, size, val, + byte_enable, cmd, data); +} + +static u8 __scc_pciex_inb(struct pci_controller *phb, unsigned long port) +{ + return (u8)scc_pciex_read_port(phb, port, 1); +} + +static u16 __scc_pciex_inw(struct pci_controller *phb, unsigned long port) +{ + u32 data; + if ((port & 0x3ul) < 3) + data = scc_pciex_read_port(phb, port, 2); + else { + u32 d1 = scc_pciex_read_port(phb, port, 1); + u32 d2 = scc_pciex_read_port(phb, port + 1, 1); + data = d1 | (d2 << 8); + } + return (u16)data; +} + +static u32 __scc_pciex_inl(struct pci_controller *phb, unsigned long port) +{ + unsigned int mod = port & 0x3ul; + u32 data; + if (mod == 0) + data = scc_pciex_read_port(phb, port, 4); + else { + u32 d1 = scc_pciex_read_port(phb, port, 4 - mod); + u32 d2 = scc_pciex_read_port(phb, port + 1, mod); + data = d1 | (d2 << (mod * 8)); + } + return data; +} + +static void __scc_pciex_outb(struct pci_controller *phb, + u8 val, unsigned long port) +{ + scc_pciex_write_port(phb, port, 1, (u32)val); +} + +static void __scc_pciex_outw(struct pci_controller *phb, + u16 val, unsigned long port) +{ + if ((port & 0x3ul) < 3) + scc_pciex_write_port(phb, port, 2, (u32)val); + else { + u32 d1 = val & 0x000000FF; + u32 d2 = (val & 0x0000FF00) >> 8; + scc_pciex_write_port(phb, port, 1, d1); + scc_pciex_write_port(phb, port + 1, 1, d2); + } +} + +static void __scc_pciex_outl(struct pci_controller *phb, + u32 val, unsigned long port) +{ + unsigned int mod = port & 0x3ul; + if (mod == 0) + scc_pciex_write_port(phb, port, 4, val); + else { + u32 d1 = val & (0xFFFFFFFFul >> (mod * 8)); + u32 d2 = val >> ((4 - mod) * 8); + scc_pciex_write_port(phb, port, 4 - mod, d1); + scc_pciex_write_port(phb, port + 1, mod, d2); + } +} + +#define PCIEX_PIO_FUNC(size, name) \ +static u##size scc_pciex_in##name(unsigned long port) \ +{ \ + struct iowa_bus *bus = iowa_pio_find_bus(port); \ + u##size data = __scc_pciex_in##name(bus->phb, port); \ + scc_pciex_io_flush(bus); \ + return data; \ +} \ +static void scc_pciex_ins##name(unsigned long p, void *b, unsigned long c) \ +{ \ + struct iowa_bus *bus = iowa_pio_find_bus(p); \ + u##size *dst = b; \ + for (; c != 0; c--, dst++) \ + *dst = cpu_to_le##size(__scc_pciex_in##name(bus->phb, p)); \ + scc_pciex_io_flush(bus); \ +} \ +static void scc_pciex_out##name(u##size val, unsigned long port) \ +{ \ + struct iowa_bus *bus = iowa_pio_find_bus(port); \ + __scc_pciex_out##name(bus->phb, val, port); \ +} \ +static void scc_pciex_outs##name(unsigned long p, const void *b, \ + unsigned long c) \ +{ \ + struct iowa_bus *bus = iowa_pio_find_bus(p); \ + const u##size *src = b; \ + for (; c != 0; c--, src++) \ + __scc_pciex_out##name(bus->phb, le##size##_to_cpu(*src), p); \ +} +#define cpu_to_le8(x) (x) +#define le8_to_cpu(x) (x) +PCIEX_PIO_FUNC(8, b) +PCIEX_PIO_FUNC(16, w) +PCIEX_PIO_FUNC(32, l) + +static struct ppc_pci_io scc_pciex_ops = { + .readb = scc_pciex_readb, + .readw = scc_pciex_readw, + .readl = scc_pciex_readl, + .readq = scc_pciex_readq, + .readw_be = scc_pciex_readw_be, + .readl_be = scc_pciex_readl_be, + .readq_be = scc_pciex_readq_be, + .readsb = scc_pciex_readsb, + .readsw = scc_pciex_readsw, + .readsl = scc_pciex_readsl, + .memcpy_fromio = scc_pciex_memcpy_fromio, + .inb = scc_pciex_inb, + .inw = scc_pciex_inw, + .inl = scc_pciex_inl, + .outb = scc_pciex_outb, + .outw = scc_pciex_outw, + .outl = scc_pciex_outl, + .insb = scc_pciex_insb, + .insw = scc_pciex_insw, + .insl = scc_pciex_insl, + .outsb = scc_pciex_outsb, + .outsw = scc_pciex_outsw, + .outsl = scc_pciex_outsl, +}; + +static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data) +{ + dma_addr_t dummy_page_da; + void *dummy_page_va; + + dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!dummy_page_va) { + pr_err("PCIEX:Alloc dummy_page_va failed\n"); + return -1; + } + + dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va, + PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dummy_page_da)) { + pr_err("PCIEX:Map dummy page failed.\n"); + kfree(dummy_page_va); + return -1; + } + + PEX_OUT(bus->phb->cfg_addr, PEXDMRDADR0, dummy_page_da); + + return 0; +} + +/* + * config space access + */ +#define MK_PEXDADRS(bus_no, dev_no, func_no, addr) \ + ((uint32_t)(((addr) & ~0x3UL) | \ + ((bus_no) << PEXDADRS_BUSNO_SHIFT) | \ + ((dev_no) << PEXDADRS_DEVNO_SHIFT) | \ + ((func_no) << PEXDADRS_FUNCNO_SHIFT))) + +#define MK_PEXDCMND_BYTE_EN(addr, size) \ + ((((0x1 << (size))-1) << ((addr) & 0x3)) << PEXDCMND_BYTE_EN_SHIFT) +#define MK_PEXDCMND(cmd, addr, size) ((cmd) | MK_PEXDCMND_BYTE_EN(addr, size)) + +static uint32_t config_read_pciex_dev(unsigned int __iomem *base, + uint64_t bus_no, uint64_t dev_no, uint64_t func_no, + uint64_t off, uint64_t size) +{ + uint32_t ret; + uint32_t addr, cmd; + + addr = MK_PEXDADRS(bus_no, dev_no, func_no, off); + cmd = MK_PEXDCMND(PEXDCMND_CONFIG_READ, off, size); + PEX_OUT(base, PEXDADRS, addr); + PEX_OUT(base, PEXDCMND, cmd); + ret = (PEX_IN(base, PEXDRDATA) + >> ((off & (4-size)) * 8)) & ((0x1 << (size * 8)) - 1); + return ret; +} + +static void config_write_pciex_dev(unsigned int __iomem *base, uint64_t bus_no, + uint64_t dev_no, uint64_t func_no, uint64_t off, uint64_t size, + uint32_t data) +{ + uint32_t addr, cmd; + + addr = MK_PEXDADRS(bus_no, dev_no, func_no, off); + cmd = MK_PEXDCMND(PEXDCMND_CONFIG_WRITE, off, size); + PEX_OUT(base, PEXDADRS, addr); + PEX_OUT(base, PEXDCMND, cmd); + PEX_OUT(base, PEXDWDATA, + (data & ((0x1 << (size * 8)) - 1)) << ((off & (4-size)) * 8)); +} + +#define MK_PEXCADRS_BYTE_EN(off, len) \ + ((((0x1 << (len)) - 1) << ((off) & 0x3)) << PEXCADRS_BYTE_EN_SHIFT) +#define MK_PEXCADRS(cmd, addr, size) \ + ((cmd) | MK_PEXCADRS_BYTE_EN(addr, size) | ((addr) & ~0x3)) +static uint32_t config_read_pciex_rc(unsigned int __iomem *base, + uint32_t where, uint32_t size) +{ + PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_READ, where, size)); + return (PEX_IN(base, PEXCRDATA) + >> ((where & (4 - size)) * 8)) & ((0x1 << (size * 8)) - 1); +} + +static void config_write_pciex_rc(unsigned int __iomem *base, uint32_t where, + uint32_t size, uint32_t val) +{ + uint32_t data; + + data = (val & ((0x1 << (size * 8)) - 1)) << ((where & (4 - size)) * 8); + PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_WRITE, where, size)); + PEX_OUT(base, PEXCWDATA, data); +} + +/* Interfaces */ +/* Note: Work-around + * On SCC PCIEXC, one device is seen on all 32 dev_no. + * As SCC PCIEXC can have only one device on the bus, we look only one dev_no. + * (dev_no = 1) + */ +static int scc_pciex_read_config(struct pci_bus *bus, unsigned int devfn, + int where, int size, unsigned int *val) +{ + struct device_node *dn; + struct pci_controller *phb; + + dn = bus->sysdata; + phb = pci_find_hose_for_OF_device(dn); + + if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) { + *val = ~0; + return PCIBIOS_DEVICE_NOT_FOUND; + } + + if (bus->number == 0 && PCI_SLOT(devfn) == 0) + *val = config_read_pciex_rc(phb->cfg_addr, where, size); + else + *val = config_read_pciex_dev(phb->cfg_addr, bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where, size); + + return PCIBIOS_SUCCESSFUL; +} + +static int scc_pciex_write_config(struct pci_bus *bus, unsigned int devfn, + int where, int size, unsigned int val) +{ + struct device_node *dn; + struct pci_controller *phb; + + dn = bus->sysdata; + phb = pci_find_hose_for_OF_device(dn); + + if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (bus->number == 0 && PCI_SLOT(devfn) == 0) + config_write_pciex_rc(phb->cfg_addr, where, size, val); + else + config_write_pciex_dev(phb->cfg_addr, bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val); + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops scc_pciex_pci_ops = { + scc_pciex_read_config, + scc_pciex_write_config, +}; + +static void pciex_clear_intr_all(unsigned int __iomem *base) +{ + PEX_OUT(base, PEXAERRSTS, 0xffffffff); + PEX_OUT(base, PEXPRERRSTS, 0xffffffff); + PEX_OUT(base, PEXINTSTS, 0xffffffff); +} + +#if 0 +static void pciex_disable_intr_all(unsigned int *base) +{ + PEX_OUT(base, PEXINTMASK, 0x0); + PEX_OUT(base, PEXAERRMASK, 0x0); + PEX_OUT(base, PEXPRERRMASK, 0x0); + PEX_OUT(base, PEXVDMASK, 0x0); +} +#endif + +static void pciex_enable_intr_all(unsigned int __iomem *base) +{ + PEX_OUT(base, PEXINTMASK, 0x0000e7f1); + PEX_OUT(base, PEXAERRMASK, 0x03ff01ff); + PEX_OUT(base, PEXPRERRMASK, 0x0001010f); + PEX_OUT(base, PEXVDMASK, 0x00000001); +} + +static void pciex_check_status(unsigned int __iomem *base) +{ + uint32_t err = 0; + uint32_t intsts, aerr, prerr, rcvcp, lenerr; + uint32_t maea, maec; + + intsts = PEX_IN(base, PEXINTSTS); + aerr = PEX_IN(base, PEXAERRSTS); + prerr = PEX_IN(base, PEXPRERRSTS); + rcvcp = PEX_IN(base, PEXRCVCPLIDA); + lenerr = PEX_IN(base, PEXLENERRIDA); + + if (intsts || aerr || prerr || rcvcp || lenerr) + err = 1; + + pr_info("PCEXC interrupt!!\n"); + pr_info("PEXINTSTS :0x%08x\n", intsts); + pr_info("PEXAERRSTS :0x%08x\n", aerr); + pr_info("PEXPRERRSTS :0x%08x\n", prerr); + pr_info("PEXRCVCPLIDA :0x%08x\n", rcvcp); + pr_info("PEXLENERRIDA :0x%08x\n", lenerr); + + /* print detail of Protection Error */ + if (intsts & 0x00004000) { + uint32_t i, n; + for (i = 0; i < 4; i++) { + n = 1 << i; + if (prerr & n) { + maea = PEX_IN(base, PEXMAEA(i)); + maec = PEX_IN(base, PEXMAEC(i)); + pr_info("PEXMAEC%d :0x%08x\n", i, maec); + pr_info("PEXMAEA%d :0x%08x\n", i, maea); + } + } + } + + if (err) + pciex_clear_intr_all(base); +} + +static irqreturn_t pciex_handle_internal_irq(int irq, void *dev_id) +{ + struct pci_controller *phb = dev_id; + + pr_debug("PCIEX:pciex_handle_internal_irq(irq=%d)\n", irq); + + BUG_ON(phb->cfg_addr == NULL); + + pciex_check_status(phb->cfg_addr); + + return IRQ_HANDLED; +} + +static __init int celleb_setup_pciex(struct device_node *node, + struct pci_controller *phb) +{ + struct resource r; + struct of_irq oirq; + int virq; + + /* SMMIO registers; used inside this file */ + if (of_address_to_resource(node, 0, &r)) { + pr_err("PCIEXC:Failed to get config resource.\n"); + return 1; + } + phb->cfg_addr = ioremap(r.start, r.end - r.start + 1); + if (!phb->cfg_addr) { + pr_err("PCIEXC:Failed to remap SMMIO region.\n"); + return 1; + } + + /* Not use cfg_data, cmd and data regs are near address reg */ + phb->cfg_data = NULL; + + /* set pci_ops */ + phb->ops = &scc_pciex_pci_ops; + + /* internal interrupt handler */ + if (of_irq_map_one(node, 1, &oirq)) { + pr_err("PCIEXC:Failed to map irq\n"); + goto error; + } + virq = irq_create_of_mapping(oirq.controller, oirq.specifier, + oirq.size); + if (request_irq(virq, pciex_handle_internal_irq, + IRQF_DISABLED, "pciex", (void *)phb)) { + pr_err("PCIEXC:Failed to request irq\n"); + goto error; + } + + /* enable all interrupts */ + pciex_clear_intr_all(phb->cfg_addr); + pciex_enable_intr_all(phb->cfg_addr); + /* MSI: TBD */ + + return 0; + +error: + phb->cfg_data = NULL; + if (phb->cfg_addr) + iounmap(phb->cfg_addr); + phb->cfg_addr = NULL; + return 1; +} + +struct celleb_phb_spec celleb_pciex_spec __initdata = { + .setup = celleb_setup_pciex, + .ops = &scc_pciex_ops, + .iowa_init = &scc_pciex_iowa_init, +}; diff --git a/arch/powerpc/platforms/celleb/scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c index 3a16c5b3c46..3a16c5b3c46 100644 --- a/arch/powerpc/platforms/celleb/scc_sio.c +++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c diff --git a/arch/powerpc/platforms/celleb/scc_uhc.c b/arch/powerpc/platforms/cell/celleb_scc_uhc.c index cb430799408..d63b720bfe3 100644 --- a/arch/powerpc/platforms/celleb/scc_uhc.c +++ b/arch/powerpc/platforms/cell/celleb_scc_uhc.c @@ -25,7 +25,7 @@ #include <asm/io.h> #include <asm/machdep.h> -#include "scc.h" +#include "celleb_scc.h" #define UHC_RESET_WAIT_MAX 10000 diff --git a/arch/powerpc/platforms/celleb/setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index f27ae1e3fb5..b11cb30decb 100644 --- a/arch/powerpc/platforms/celleb/setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -56,13 +56,13 @@ #include <asm/rtas.h> #include <asm/cell-regs.h> -#include "interrupt.h" +#include "beat_interrupt.h" #include "beat_wrapper.h" #include "beat.h" -#include "pci.h" -#include "../cell/interrupt.h" -#include "../cell/pervasive.h" -#include "../cell/ras.h" +#include "celleb_pci.h" +#include "interrupt.h" +#include "pervasive.h" +#include "ras.h" static char celleb_machine_type[128] = "Celleb"; @@ -114,8 +114,6 @@ static int __init celleb_publish_devices(void) /* Publish OF platform devices for southbridge IOs */ of_platform_bus_probe(NULL, celleb_bus_ids, NULL); - celleb_pci_workaround_init(); - return 0; } machine_device_initcall(celleb_beat, celleb_publish_devices); diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c index 979d4b67efb..3b84e8be314 100644 --- a/arch/powerpc/platforms/cell/io-workarounds.c +++ b/arch/powerpc/platforms/cell/io-workarounds.c @@ -1,6 +1,9 @@ /* + * Support PCI IO workaround + * * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org> * IBM, Corp. + * (C) Copyright 2007-2008 TOSHIBA CORPORATION * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,335 +12,174 @@ #undef DEBUG #include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/pci.h> + #include <asm/io.h> #include <asm/machdep.h> -#include <asm/pci-bridge.h> +#include <asm/pgtable.h> #include <asm/ppc-pci.h> +#include "io-workarounds.h" -#define SPIDER_PCI_REG_BASE 0xd000 -#define SPIDER_PCI_VCI_CNTL_STAT 0x0110 -#define SPIDER_PCI_DUMMY_READ 0x0810 -#define SPIDER_PCI_DUMMY_READ_BASE 0x0814 +#define IOWA_MAX_BUS 8 -/* Undefine that to re-enable bogus prefetch - * - * Without that workaround, the chip will do bogus prefetch past - * page boundary from system memory. This setting will disable that, - * though the documentation is unclear as to the consequences of doing - * so, either purely performances, or possible misbehaviour... It's not - * clear wether the chip can handle unaligned accesses at all without - * prefetching enabled. - * - * For now, things appear to be behaving properly with that prefetching - * disabled and IDE, possibly because IDE isn't doing any unaligned - * access. - */ -#define SPIDER_DISABLE_PREFETCH +static struct iowa_bus iowa_busses[IOWA_MAX_BUS]; +static unsigned int iowa_bus_count; -#define MAX_SPIDERS 3 +static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) +{ + int i, j; + struct resource *res; + unsigned long vstart, vend; -static struct spider_pci_bus { - void __iomem *regs; - unsigned long mmio_start; - unsigned long mmio_end; - unsigned long pio_vstart; - unsigned long pio_vend; -} spider_pci_busses[MAX_SPIDERS]; -static int spider_pci_count; + for (i = 0; i < iowa_bus_count; i++) { + struct iowa_bus *bus = &iowa_busses[i]; + struct pci_controller *phb = bus->phb; -static struct spider_pci_bus *spider_pci_find(unsigned long vaddr, - unsigned long paddr) -{ - int i; - - for (i = 0; i < spider_pci_count; i++) { - struct spider_pci_bus *bus = &spider_pci_busses[i]; - if (paddr && paddr >= bus->mmio_start && paddr < bus->mmio_end) - return bus; - if (vaddr && vaddr >= bus->pio_vstart && vaddr < bus->pio_vend) - return bus; + if (vaddr) { + vstart = (unsigned long)phb->io_base_virt; + vend = vstart + phb->pci_io_size - 1; + if ((vaddr >= vstart) && (vaddr <= vend)) + return bus; + } + + if (paddr) + for (j = 0; j < 3; j++) { + res = &phb->mem_resources[j]; + if (paddr >= res->start && paddr <= res->end) + return bus; + } } + return NULL; } -static void spider_io_flush(const volatile void __iomem *addr) +struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) { - struct spider_pci_bus *bus; + struct iowa_bus *bus; int token; - /* Get platform token (set by ioremap) from address */ token = PCI_GET_ADDR_TOKEN(addr); - /* Fast path if we have a non-0 token, it indicates which bus we - * are on. - * - * If the token is 0, that means either that the ioremap was done - * before we initialized this layer, or it's a PIO operation. We - * fallback to a low path in this case. Hopefully, internal devices - * which are ioremap'ed early should use in_XX/out_XX functions - * instead of the PCI ones and thus not suffer from the slowdown. - * - * Also note that currently, the workaround will not work for areas - * that are not mapped with PTEs (bolted in the hash table). This - * is the case for ioremaps done very early at boot (before - * mem_init_done) and includes the mapping of the ISA IO space. - * - * Fortunately, none of the affected devices is expected to do DMA - * and thus there should be no problem in practice. - * - * In order to improve performances, we only do the PTE search for - * addresses falling in the PHB IO space area. That means it will - * not work for hotplug'ed PHBs but those don't exist with Spider. - */ - if (token && token <= spider_pci_count) - bus = &spider_pci_busses[token - 1]; + if (token && token <= iowa_bus_count) + bus = &iowa_busses[token - 1]; else { unsigned long vaddr, paddr; pte_t *ptep; - /* Fixup physical address */ vaddr = (unsigned long)PCI_FIX_ADDR(addr); + if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) + return NULL; - /* Check if it's in allowed range for PIO */ - if (vaddr < PHB_IO_BASE || vaddr > PHB_IO_END) - return; - - /* Try to find a PTE. If not, clear the paddr, we'll do - * a vaddr only lookup (PIO only) - */ ptep = find_linux_pte(init_mm.pgd, vaddr); if (ptep == NULL) paddr = 0; else paddr = pte_pfn(*ptep) << PAGE_SHIFT; + bus = iowa_pci_find(vaddr, paddr); - bus = spider_pci_find(vaddr, paddr); if (bus == NULL) - return; + return NULL; } - /* Now do the workaround - */ - (void)in_be32(bus->regs + SPIDER_PCI_DUMMY_READ); + return bus; } -static u8 spider_readb(const volatile void __iomem *addr) +struct iowa_bus *iowa_pio_find_bus(unsigned long port) { - u8 val = __do_readb(addr); - spider_io_flush(addr); - return val; + unsigned long vaddr = (unsigned long)pci_io_base + port; + return iowa_pci_find(vaddr, 0); } -static u16 spider_readw(const volatile void __iomem *addr) -{ - u16 val = __do_readw(addr); - spider_io_flush(addr); - return val; -} -static u32 spider_readl(const volatile void __iomem *addr) -{ - u32 val = __do_readl(addr); - spider_io_flush(addr); - return val; +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ +static ret iowa_##name at \ +{ \ + struct iowa_bus *bus; \ + bus = iowa_##space##_find_bus(aa); \ + if (bus && bus->ops && bus->ops->name) \ + return bus->ops->name al; \ + return __do_##name al; \ } -static u64 spider_readq(const volatile void __iomem *addr) -{ - u64 val = __do_readq(addr); - spider_io_flush(addr); - return val; +#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ +static void iowa_##name at \ +{ \ + struct iowa_bus *bus; \ + bus = iowa_##space##_find_bus(aa); \ + if (bus && bus->ops && bus->ops->name) { \ + bus->ops->name al; \ + return; \ + } \ + __do_##name al; \ } -static u16 spider_readw_be(const volatile void __iomem *addr) -{ - u16 val = __do_readw_be(addr); - spider_io_flush(addr); - return val; -} +#include <asm/io-defs.h> -static u32 spider_readl_be(const volatile void __iomem *addr) -{ - u32 val = __do_readl_be(addr); - spider_io_flush(addr); - return val; -} +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET -static u64 spider_readq_be(const volatile void __iomem *addr) -{ - u64 val = __do_readq_be(addr); - spider_io_flush(addr); - return val; -} +static struct ppc_pci_io __initdata iowa_pci_io = { -static void spider_readsb(const volatile void __iomem *addr, void *buf, - unsigned long count) -{ - __do_readsb(addr, buf, count); - spider_io_flush(addr); -} +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name, +#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name, -static void spider_readsw(const volatile void __iomem *addr, void *buf, - unsigned long count) -{ - __do_readsw(addr, buf, count); - spider_io_flush(addr); -} +#include <asm/io-defs.h> -static void spider_readsl(const volatile void __iomem *addr, void *buf, - unsigned long count) -{ - __do_readsl(addr, buf, count); - spider_io_flush(addr); -} - -static void spider_memcpy_fromio(void *dest, const volatile void __iomem *src, - unsigned long n) -{ - __do_memcpy_fromio(dest, src, n); - spider_io_flush(src); -} +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET +}; -static void __iomem * spider_ioremap(unsigned long addr, unsigned long size, - unsigned long flags) +static void __iomem *iowa_ioremap(unsigned long addr, unsigned long size, + unsigned long flags) { - struct spider_pci_bus *bus; + struct iowa_bus *bus; void __iomem *res = __ioremap(addr, size, flags); int busno; - pr_debug("spider_ioremap(0x%lx, 0x%lx, 0x%lx) -> 0x%p\n", - addr, size, flags, res); - - bus = spider_pci_find(0, addr); + bus = iowa_pci_find(0, addr); if (bus != NULL) { - busno = bus - spider_pci_busses; - pr_debug(" found bus %d, setting token\n", busno); + busno = bus - iowa_busses; PCI_SET_ADDR_TOKEN(res, busno + 1); } - pr_debug(" result=0x%p\n", res); - return res; } -static void __init spider_pci_setup_chip(struct spider_pci_bus *bus) -{ -#ifdef SPIDER_DISABLE_PREFETCH - u32 val = in_be32(bus->regs + SPIDER_PCI_VCI_CNTL_STAT); - pr_debug(" PVCI_Control_Status was 0x%08x\n", val); - out_be32(bus->regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8); -#endif - - /* Configure the dummy address for the workaround */ - out_be32(bus->regs + SPIDER_PCI_DUMMY_READ_BASE, 0x80000000); -} - -static void __init spider_pci_add_one(struct pci_controller *phb) +/* Regist new bus to support workaround */ +void __init iowa_register_bus(struct pci_controller *phb, + struct ppc_pci_io *ops, + int (*initfunc)(struct iowa_bus *, void *), void *data) { - struct spider_pci_bus *bus = &spider_pci_busses[spider_pci_count]; + struct iowa_bus *bus; struct device_node *np = phb->dn; - struct resource rsrc; - void __iomem *regs; - if (spider_pci_count >= MAX_SPIDERS) { - printk(KERN_ERR "Too many spider bridges, workarounds" - " disabled for %s\n", np->full_name); + if (iowa_bus_count >= IOWA_MAX_BUS) { + pr_err("IOWA:Too many pci bridges, " + "workarounds disabled for %s\n", np->full_name); return; } - /* Get the registers for the beast */ - if (of_address_to_resource(np, 0, &rsrc)) { - printk(KERN_ERR "Failed to get registers for spider %s" - " workarounds disabled\n", np->full_name); - return; - } + bus = &iowa_busses[iowa_bus_count]; + bus->phb = phb; + bus->ops = ops; - /* Mask out some useless bits in there to get to the base of the - * spider chip - */ - rsrc.start &= ~0xfffffffful; - - /* Map them */ - regs = ioremap(rsrc.start + SPIDER_PCI_REG_BASE, 0x1000); - if (regs == NULL) { - printk(KERN_ERR "Failed to map registers for spider %s" - " workarounds disabled\n", np->full_name); - return; - } - - spider_pci_count++; - - /* We assume spiders only have one MMIO resource */ - bus->mmio_start = phb->mem_resources[0].start; - bus->mmio_end = phb->mem_resources[0].end + 1; - - bus->pio_vstart = (unsigned long)phb->io_base_virt; - bus->pio_vend = bus->pio_vstart + phb->pci_io_size; - - bus->regs = regs; - - printk(KERN_INFO "PCI: Spider MMIO workaround for %s\n",np->full_name); + if (initfunc) + if ((*initfunc)(bus, data)) + return; - pr_debug(" mmio (P) = 0x%016lx..0x%016lx\n", - bus->mmio_start, bus->mmio_end); - pr_debug(" pio (V) = 0x%016lx..0x%016lx\n", - bus->pio_vstart, bus->pio_vend); - pr_debug(" regs (P) = 0x%016lx (V) = 0x%p\n", - rsrc.start + SPIDER_PCI_REG_BASE, bus->regs); + iowa_bus_count++; - spider_pci_setup_chip(bus); + pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name); } -static struct ppc_pci_io __initdata spider_pci_io = { - .readb = spider_readb, - .readw = spider_readw, - .readl = spider_readl, - .readq = spider_readq, - .readw_be = spider_readw_be, - .readl_be = spider_readl_be, - .readq_be = spider_readq_be, - .readsb = spider_readsb, - .readsw = spider_readsw, - .readsl = spider_readsl, - .memcpy_fromio = spider_memcpy_fromio, -}; - -static int __init spider_pci_workaround_init(void) +/* enable IO workaround */ +void __init io_workaround_init(void) { - struct pci_controller *phb; - - /* Find spider bridges. We assume they have been all probed - * in setup_arch(). If that was to change, we would need to - * update this code to cope with dynamically added busses - */ - list_for_each_entry(phb, &hose_list, list_node) { - struct device_node *np = phb->dn; - const char *model = of_get_property(np, "model", NULL); - - /* If no model property or name isn't exactly "pci", skip */ - if (model == NULL || strcmp(np->name, "pci")) - continue; - /* If model is not "Spider", skip */ - if (strcmp(model, "Spider")) - continue; - spider_pci_add_one(phb); - } - - /* No Spider PCI found, exit */ - if (spider_pci_count == 0) - return 0; + static int io_workaround_inited; - /* Setup IO callbacks. We only setup MMIO reads. PIO reads will - * fallback to MMIO reads (though without a token, thus slower) - */ - ppc_pci_io = spider_pci_io; - - /* Setup ioremap callback */ - ppc_md.ioremap = spider_ioremap; - - return 0; + if (io_workaround_inited) + return; + ppc_pci_io = iowa_pci_io; + ppc_md.ioremap = iowa_ioremap; + io_workaround_inited = 1; } -machine_arch_initcall(cell, spider_pci_workaround_init); diff --git a/arch/powerpc/platforms/cell/io-workarounds.h b/arch/powerpc/platforms/cell/io-workarounds.h new file mode 100644 index 00000000000..79d8ed3d510 --- /dev/null +++ b/arch/powerpc/platforms/cell/io-workarounds.h @@ -0,0 +1,49 @@ +/* + * Support PCI IO workaround + * + * (C) Copyright 2007-2008 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _IO_WORKAROUNDS_H +#define _IO_WORKAROUNDS_H + +#include <linux/io.h> +#include <asm/pci-bridge.h> + +/* Bus info */ +struct iowa_bus { + struct pci_controller *phb; + struct ppc_pci_io *ops; + void *private; +}; + +void __init io_workaround_init(void); +void __init iowa_register_bus(struct pci_controller *, struct ppc_pci_io *, + int (*)(struct iowa_bus *, void *), void *); +struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR); +struct iowa_bus *iowa_pio_find_bus(unsigned long); + +extern struct ppc_pci_io spiderpci_ops; +extern int spiderpci_iowa_init(struct iowa_bus *, void *); + +#define SPIDER_PCI_REG_BASE 0xd000 +#define SPIDER_PCI_REG_SIZE 0x1000 +#define SPIDER_PCI_VCI_CNTL_STAT 0x0110 +#define SPIDER_PCI_DUMMY_READ 0x0810 +#define SPIDER_PCI_DUMMY_READ_BASE 0x0814 + +#endif /* _IO_WORKAROUNDS_H */ diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 5c531e8f9f6..ab721b50fbb 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -57,6 +57,7 @@ #include "interrupt.h" #include "pervasive.h" #include "ras.h" +#include "io-workarounds.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -117,13 +118,50 @@ static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex); +static int __devinit cell_setup_phb(struct pci_controller *phb) +{ + const char *model; + struct device_node *np; + + int rc = rtas_setup_phb(phb); + if (rc) + return rc; + + np = phb->dn; + model = of_get_property(np, "model", NULL); + if (model == NULL || strcmp(np->name, "pci")) + return 0; + + /* Setup workarounds for spider */ + if (strcmp(model, "Spider")) + return 0; + + iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init, + (void *)SPIDER_PCI_REG_BASE); + io_workaround_init(); + + return 0; +} + static int __init cell_publish_devices(void) { + struct device_node *root = of_find_node_by_path("/"); + struct device_node *np; int node; /* Publish OF platform devices for southbridge IOs */ of_platform_bus_probe(NULL, NULL, NULL); + /* On spider based blades, we need to manually create the OF + * platform devices for the PCI host bridges + */ + for_each_child_of_node(root, np) { + if (np->type == NULL || (strcmp(np->type, "pci") != 0 && + strcmp(np->type, "pciex") != 0)) + continue; + of_platform_device_create(np, NULL, NULL); + } + /* There is no device for the MIC memory controller, thus we create * a platform device for it to attach the EDAC driver to. */ @@ -132,6 +170,7 @@ static int __init cell_publish_devices(void) continue; platform_device_register_simple("cbe-mic", node, NULL, 0); } + return 0; } machine_subsys_initcall(cell, cell_publish_devices); @@ -213,7 +252,7 @@ static void __init cell_setup_arch(void) /* Find and initialize PCI host bridges */ init_pci_config_tokens(); - find_and_init_phbs(); + cbe_pervasive_init(); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; @@ -249,7 +288,7 @@ define_machine(cell) { .calibrate_decr = generic_calibrate_decr, .progress = cell_progress, .init_IRQ = cell_init_irq, - .pci_setup_phb = rtas_setup_phb, + .pci_setup_phb = cell_setup_phb, #ifdef CONFIG_KEXEC .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c new file mode 100644 index 00000000000..418b605ac35 --- /dev/null +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -0,0 +1,184 @@ +/* + * IO workarounds for PCI on Celleb/Cell platform + * + * (C) Copyright 2006-2007 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/of_platform.h> +#include <linux/io.h> + +#include <asm/ppc-pci.h> +#include <asm/pci-bridge.h> + +#include "io-workarounds.h" + +#define SPIDER_PCI_DISABLE_PREFETCH + +struct spiderpci_iowa_private { + void __iomem *regs; +}; + +static void spiderpci_io_flush(struct iowa_bus *bus) +{ + struct spiderpci_iowa_private *priv; + u32 val; + + priv = bus->private; + val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); + iosync(); +} + +#define SPIDER_PCI_MMIO_READ(name, ret) \ +static ret spiderpci_##name(const PCI_IO_ADDR addr) \ +{ \ + ret val = __do_##name(addr); \ + spiderpci_io_flush(iowa_mem_find_bus(addr)); \ + return val; \ +} + +#define SPIDER_PCI_MMIO_READ_STR(name) \ +static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, \ + unsigned long count) \ +{ \ + __do_##name(addr, buf, count); \ + spiderpci_io_flush(iowa_mem_find_bus(addr)); \ +} + +SPIDER_PCI_MMIO_READ(readb, u8) +SPIDER_PCI_MMIO_READ(readw, u16) +SPIDER_PCI_MMIO_READ(readl, u32) +SPIDER_PCI_MMIO_READ(readq, u64) +SPIDER_PCI_MMIO_READ(readw_be, u16) +SPIDER_PCI_MMIO_READ(readl_be, u32) +SPIDER_PCI_MMIO_READ(readq_be, u64) +SPIDER_PCI_MMIO_READ_STR(readsb) +SPIDER_PCI_MMIO_READ_STR(readsw) +SPIDER_PCI_MMIO_READ_STR(readsl) + +static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src, + unsigned long n) +{ + __do_memcpy_fromio(dest, src, n); + spiderpci_io_flush(iowa_mem_find_bus(src)); +} + +static int __init spiderpci_pci_setup_chip(struct pci_controller *phb, + void __iomem *regs) +{ + void *dummy_page_va; + dma_addr_t dummy_page_da; + +#ifdef SPIDER_PCI_DISABLE_PREFETCH + u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT); + pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val); + out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8); +#endif /* SPIDER_PCI_DISABLE_PREFETCH */ + + /* setup dummy read */ + /* + * On CellBlade, we can't know that which XDR memory is used by + * kmalloc() to allocate dummy_page_va. + * In order to imporve the performance, the XDR which is used to + * allocate dummy_page_va is the nearest the spider-pci. + * We have to select the CBE which is the nearest the spider-pci + * to allocate memory from the best XDR, but I don't know that + * how to do. + * + * Celleb does not have this problem, because it has only one XDR. + */ + dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!dummy_page_va) { + pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n"); + return -1; + } + + dummy_page_da = dma_map_single(phb->parent, dummy_page_va, + PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dummy_page_da)) { + pr_err("SPIDER-IOWA:Map dummy page filed.\n"); + kfree(dummy_page_va); + return -1; + } + + out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da); + + return 0; +} + +int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data) +{ + void __iomem *regs = NULL; + struct spiderpci_iowa_private *priv; + struct device_node *np = bus->phb->dn; + struct resource r; + unsigned long offset = (unsigned long)data; + + pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n", + np->full_name); + + priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL); + if (!priv) { + pr_err("SPIDERPCI-IOWA:" + "Can't allocate struct spiderpci_iowa_private"); + return -1; + } + + if (of_address_to_resource(np, 0, &r)) { + pr_err("SPIDERPCI-IOWA:Can't get resource.\n"); + goto error; + } + + regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE); + if (!regs) { + pr_err("SPIDERPCI-IOWA:ioremap failed.\n"); + goto error; + } + priv->regs = regs; + bus->private = priv; + + if (spiderpci_pci_setup_chip(bus->phb, regs)) + goto error; + + return 0; + +error: + kfree(priv); + bus->private = NULL; + + if (regs) + iounmap(regs); + + return -1; +} + +struct ppc_pci_io spiderpci_ops = { + .readb = spiderpci_readb, + .readw = spiderpci_readw, + .readl = spiderpci_readl, + .readq = spiderpci_readq, + .readw_be = spiderpci_readw_be, + .readl_be = spiderpci_readl_be, + .readq_be = spiderpci_readq_be, + .readsb = spiderpci_readsb, + .readsw = spiderpci_readsw, + .readsl = spiderpci_readsl, + .memcpy_fromio = spiderpci_memcpy_fromio, +}; + diff --git a/arch/powerpc/platforms/celleb/Kconfig b/arch/powerpc/platforms/celleb/Kconfig deleted file mode 100644 index 372891edcdd..00000000000 --- a/arch/powerpc/platforms/celleb/Kconfig +++ /dev/null @@ -1,12 +0,0 @@ -config PPC_CELLEB - bool "Toshiba's Cell Reference Set 'Celleb' Architecture" - depends on PPC_MULTIPLATFORM && PPC64 - select PPC_CELL - select PPC_CELL_NATIVE - select PPC_RTAS - select PPC_INDIRECT_IO - select PPC_OF_PLATFORM_PCI - select HAS_TXX9_SERIAL - select PPC_UDBG_BEAT - select USB_OHCI_BIG_ENDIAN_MMIO - select USB_EHCI_BIG_ENDIAN_MMIO diff --git a/arch/powerpc/platforms/celleb/Makefile b/arch/powerpc/platforms/celleb/Makefile deleted file mode 100644 index 889d43f715e..00000000000 --- a/arch/powerpc/platforms/celleb/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -obj-y += interrupt.o iommu.o setup.o \ - htab.o beat.o hvCall.o pci.o \ - scc_epci.o scc_uhc.o \ - io-workarounds.o - -obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_PPC_UDBG_BEAT) += udbg_beat.o -obj-$(CONFIG_SERIAL_TXX9) += scc_sio.o -obj-$(CONFIG_SPU_BASE) += spu_priv1.o diff --git a/arch/powerpc/platforms/celleb/io-workarounds.c b/arch/powerpc/platforms/celleb/io-workarounds.c deleted file mode 100644 index 423339be1ba..00000000000 --- a/arch/powerpc/platforms/celleb/io-workarounds.c +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Support for Celleb io workarounds - * - * (C) Copyright 2006-2007 TOSHIBA CORPORATION - * - * This file is based to arch/powerpc/platform/cell/io-workarounds.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#undef DEBUG - -#include <linux/of.h> -#include <linux/of_device.h> -#include <linux/irq.h> - -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/machdep.h> -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> - -#include "pci.h" - -#define MAX_CELLEB_PCI_BUS 4 - -void *celleb_dummy_page_va; - -static struct celleb_pci_bus { - struct pci_controller *phb; - void (*dummy_read)(struct pci_controller *); -} celleb_pci_busses[MAX_CELLEB_PCI_BUS]; - -static int celleb_pci_count = 0; - -static struct celleb_pci_bus *celleb_pci_find(unsigned long vaddr, - unsigned long paddr) -{ - int i, j; - struct resource *res; - - for (i = 0; i < celleb_pci_count; i++) { - struct celleb_pci_bus *bus = &celleb_pci_busses[i]; - struct pci_controller *phb = bus->phb; - if (paddr) - for (j = 0; j < 3; j++) { - res = &phb->mem_resources[j]; - if (paddr >= res->start && paddr <= res->end) - return bus; - } - res = &phb->io_resource; - if (vaddr && vaddr >= res->start && vaddr <= res->end) - return bus; - } - return NULL; -} - -static void celleb_io_flush(const PCI_IO_ADDR addr) -{ - struct celleb_pci_bus *bus; - int token; - - token = PCI_GET_ADDR_TOKEN(addr); - - if (token && token <= celleb_pci_count) - bus = &celleb_pci_busses[token - 1]; - else { - unsigned long vaddr, paddr; - pte_t *ptep; - - vaddr = (unsigned long)PCI_FIX_ADDR(addr); - if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) - return; - - ptep = find_linux_pte(init_mm.pgd, vaddr); - if (ptep == NULL) - paddr = 0; - else - paddr = pte_pfn(*ptep) << PAGE_SHIFT; - bus = celleb_pci_find(vaddr, paddr); - - if (bus == NULL) - return; - } - - if (bus->dummy_read) - bus->dummy_read(bus->phb); -} - -static u8 celleb_readb(const PCI_IO_ADDR addr) -{ - u8 val; - val = __do_readb(addr); - celleb_io_flush(addr); - return val; -} - -static u16 celleb_readw(const PCI_IO_ADDR addr) -{ - u16 val; - val = __do_readw(addr); - celleb_io_flush(addr); - return val; -} - -static u32 celleb_readl(const PCI_IO_ADDR addr) -{ - u32 val; - val = __do_readl(addr); - celleb_io_flush(addr); - return val; -} - -static u64 celleb_readq(const PCI_IO_ADDR addr) -{ - u64 val; - val = __do_readq(addr); - celleb_io_flush(addr); - return val; -} - -static u16 celleb_readw_be(const PCI_IO_ADDR addr) -{ - u16 val; - val = __do_readw_be(addr); - celleb_io_flush(addr); - return val; -} - -static u32 celleb_readl_be(const PCI_IO_ADDR addr) -{ - u32 val; - val = __do_readl_be(addr); - celleb_io_flush(addr); - return val; -} - -static u64 celleb_readq_be(const PCI_IO_ADDR addr) -{ - u64 val; - val = __do_readq_be(addr); - celleb_io_flush(addr); - return val; -} - -static void celleb_readsb(const PCI_IO_ADDR addr, - void *buf, unsigned long count) -{ - __do_readsb(addr, buf, count); - celleb_io_flush(addr); -} - -static void celleb_readsw(const PCI_IO_ADDR addr, - void *buf, unsigned long count) -{ - __do_readsw(addr, buf, count); - celleb_io_flush(addr); -} - -static void celleb_readsl(const PCI_IO_ADDR addr, - void *buf, unsigned long count) -{ - __do_readsl(addr, buf, count); - celleb_io_flush(addr); -} - -static void celleb_memcpy_fromio(void *dest, - const PCI_IO_ADDR src, - unsigned long n) -{ - __do_memcpy_fromio(dest, src, n); - celleb_io_flush(src); -} - -static void __iomem *celleb_ioremap(unsigned long addr, - unsigned long size, - unsigned long flags) -{ - struct celleb_pci_bus *bus; - void __iomem *res = __ioremap(addr, size, flags); - int busno; - - bus = celleb_pci_find(0, addr); - if (bus != NULL) { - busno = bus - celleb_pci_busses; - PCI_SET_ADDR_TOKEN(res, busno + 1); - } - return res; -} - -static void celleb_iounmap(volatile void __iomem *addr) -{ - return __iounmap(PCI_FIX_ADDR(addr)); -} - -static struct ppc_pci_io celleb_pci_io __initdata = { - .readb = celleb_readb, - .readw = celleb_readw, - .readl = celleb_readl, - .readq = celleb_readq, - .readw_be = celleb_readw_be, - .readl_be = celleb_readl_be, - .readq_be = celleb_readq_be, - .readsb = celleb_readsb, - .readsw = celleb_readsw, - .readsl = celleb_readsl, - .memcpy_fromio = celleb_memcpy_fromio, -}; - -void __init celleb_pci_add_one(struct pci_controller *phb, - void (*dummy_read)(struct pci_controller *)) -{ - struct celleb_pci_bus *bus = &celleb_pci_busses[celleb_pci_count]; - struct device_node *np = phb->dn; - - if (celleb_pci_count >= MAX_CELLEB_PCI_BUS) { - printk(KERN_ERR "Too many pci bridges, workarounds" - " disabled for %s\n", np->full_name); - return; - } - - celleb_pci_count++; - - bus->phb = phb; - bus->dummy_read = dummy_read; -} - -static struct of_device_id celleb_pci_workaround_match[] __initdata = { - { - .name = "pci-pseudo", - .data = fake_pci_workaround_init, - }, { - .name = "epci", - .data = epci_workaround_init, - }, { - }, -}; - -int __init celleb_pci_workaround_init(void) -{ - struct pci_controller *phb; - struct device_node *node; - const struct of_device_id *match; - void (*init_func)(struct pci_controller *); - - celleb_dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!celleb_dummy_page_va) { - printk(KERN_ERR "Celleb: dummy read disabled. " - "Alloc celleb_dummy_page_va failed\n"); - return 1; - } - - list_for_each_entry(phb, &hose_list, list_node) { - node = phb->dn; - match = of_match_node(celleb_pci_workaround_match, node); - - if (match) { - init_func = match->data; - (*init_func)(phb); - } - } - - ppc_pci_io = celleb_pci_io; - ppc_md.ioremap = celleb_ioremap; - ppc_md.iounmap = celleb_iounmap; - - return 0; -} diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S index c775cd4b3d6..8ff330d026c 100644 --- a/arch/powerpc/platforms/iseries/exception.S +++ b/arch/powerpc/platforms/iseries/exception.S @@ -59,8 +59,33 @@ system_reset_iSeries: andc r4,r4,r5 mtspr SPRN_CTRLT,r4 +/* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */ +/* In the UP case we'll yeild() later, and we will not access the paca anyway */ +#ifdef CONFIG_SMP 1: HMT_LOW + LOAD_REG_IMMEDIATE(r23, __secondary_hold_spinloop) + ld r23,0(r23) + sync + LOAD_REG_IMMEDIATE(r3,current_set) + sldi r28,r24,3 /* get current_set[cpu#] */ + ldx r3,r3,r28 + addi r1,r3,THREAD_SIZE + subi r1,r1,STACK_FRAME_OVERHEAD + + cmpwi 0,r23,0 /* Keep poking the Hypervisor until */ + bne 2f /* we're released */ + /* Let the Hypervisor know we are alive */ + /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ + lis r3,0x8002 + rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ + li r0,-1 /* r0=-1 indicates a Hypervisor call */ + sc /* Invoke the hypervisor via a system call */ + b 1b +#endif + +2: + HMT_LOW #ifdef CONFIG_SMP lbz r23,PACAPROCSTART(r13) /* Test if this processor * should start */ @@ -91,7 +116,7 @@ iSeries_secondary_smp_loop: li r0,-1 /* r0=-1 indicates a Hypervisor call */ sc /* Invoke the hypervisor via a system call */ mfspr r13,SPRN_SPRG3 /* Put r13 back ???? */ - b 1b /* If SMP not configured, secondaries + b 2b /* If SMP not configured, secondaries * loop forever */ /*** ISeries-LPAR interrupt handlers ***/ diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index c73379ec914..1d201782d4e 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -25,6 +25,7 @@ #include <linux/syscalls.h> #include <linux/ctype.h> #include <linux/lmb.h> +#include <linux/of.h> #include <asm/prom.h> diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 306a9d07491..07fe5b69b9e 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -34,3 +34,8 @@ config LPARCFG help Provide system capacity information via human readable <key word>=<value> pairs through a /proc/ppc64/lparcfg interface. + +config PPC_PSERIES_DEBUG + depends on PPC_PSERIES && PPC_EARLY_DEBUG + bool "Enable extra debug logging in platforms/pseries" + default y diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index bdae04bb7a0..bd2593ed28d 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -2,6 +2,10 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc endif +ifeq ($(CONFIG_PPC_PSERIES_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif + obj-y := lpar.o hvCall.o nvram.o reconfig.o \ setup.o iommu.o ras.o rtasd.o \ firmware.o power.o diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 550b2f7d2cc..a3fd56b186e 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -39,7 +39,6 @@ #include <asm/ppc-pci.h> #include <asm/rtas.h> -#undef DEBUG /** Overview: * EEH, or "Extended Error Handling" is a PCI bridge technology for diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index 1e83fcd0df3..ce37040af87 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -28,7 +28,6 @@ #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> -#undef DEBUG /** * The pci address cache subsystem. This subsystem places diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index b765b7c77b6..9d3a40f4597 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -21,17 +21,11 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG #include <asm/firmware.h> #include <asm/prom.h> #include <asm/udbg.h> -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif typedef struct { unsigned long val; @@ -72,7 +66,7 @@ void __init fw_feature_init(const char *hypertas, unsigned long len) const char *s; int i; - DBG(" -> fw_feature_init()\n"); + pr_debug(" -> fw_feature_init()\n"); for (s = hypertas; s < hypertas + len; s += strlen(s) + 1) { for (i = 0; i < FIRMWARE_MAX_FEATURES; i++) { @@ -88,5 +82,5 @@ void __init fw_feature_init(const char *hypertas, unsigned long len) } } - DBG(" <- fw_feature_init()\n"); + pr_debug(" <- fw_feature_init()\n"); } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index a65c7630820..176f1f39d2d 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -47,7 +47,6 @@ #include "plpar_wrappers.h" -#define DBG(fmt...) static void tce_build_pSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, @@ -322,7 +321,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) dn = pci_bus_to_OF_node(bus); - DBG("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name); + pr_debug("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name); if (bus->self) { /* This is not a root bus, any setup will be done for the @@ -347,7 +346,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) children++; - DBG("Children: %d\n", children); + pr_debug("Children: %d\n", children); /* Calculate amount of DMA window per slot. Each window must be * a power of two (due to pci_alloc_consistent requirements). @@ -361,8 +360,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) while (pci->phb->dma_window_size * children > 0x80000000ul) pci->phb->dma_window_size >>= 1; - DBG("No ISA/IDE, window size is 0x%lx\n", - pci->phb->dma_window_size); + pr_debug("No ISA/IDE, window size is 0x%lx\n", + pci->phb->dma_window_size); pci->phb->dma_window_base_cur = 0; return; @@ -387,8 +386,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) while (pci->phb->dma_window_size * children > 0x70000000ul) pci->phb->dma_window_size >>= 1; - DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size); - + pr_debug("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size); } @@ -401,7 +399,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) dn = pci_bus_to_OF_node(bus); - DBG("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n", dn->full_name); + pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n", + dn->full_name); /* Find nearest ibm,dma-window, walking up the device tree */ for (pdn = dn; pdn != NULL; pdn = pdn->parent) { @@ -411,14 +410,14 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) } if (dma_window == NULL) { - DBG(" no ibm,dma-window property !\n"); + pr_debug(" no ibm,dma-window property !\n"); return; } ppci = PCI_DN(pdn); - DBG(" parent is %s, iommu_table: 0x%p\n", - pdn->full_name, ppci->iommu_table); + pr_debug(" parent is %s, iommu_table: 0x%p\n", + pdn->full_name, ppci->iommu_table); if (!ppci->iommu_table) { tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL, @@ -426,7 +425,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window, bus->number); ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); - DBG(" created table: %p\n", ppci->iommu_table); + pr_debug(" created table: %p\n", ppci->iommu_table); } if (pdn != dn) @@ -439,7 +438,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) struct device_node *dn; struct iommu_table *tbl; - DBG("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); + pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); dn = dev->dev.archdata.of_node; @@ -450,7 +449,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) if (!dev->bus->self) { struct pci_controller *phb = PCI_DN(dn)->phb; - DBG(" --> first child, no bridge. Allocating iommu table.\n"); + pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL, phb->node); iommu_table_setparms(phb, dn, tbl); @@ -480,7 +479,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) const void *dma_window = NULL; struct pci_dn *pci; - DBG("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); + pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); /* dev setup for LPAR is a little tricky, since the device tree might * contain the dma-window properties per-device and not neccesarily @@ -489,7 +488,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) * already allocated. */ dn = pci_device_to_OF_node(dev); - DBG(" node is %s\n", dn->full_name); + pr_debug(" node is %s\n", dn->full_name); for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; pdn = pdn->parent) { @@ -504,13 +503,13 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pci_name(dev), dn? dn->full_name : "<null>"); return; } - DBG(" parent is %s\n", pdn->full_name); + pr_debug(" parent is %s\n", pdn->full_name); /* Check for parent == NULL so we don't try to setup the empty EADS * slots on POWER4 machines. */ if (dma_window == NULL || pdn->parent == NULL) { - DBG(" no dma window for device, linking to parent\n"); + pr_debug(" no dma window for device, linking to parent\n"); dev->dev.archdata.dma_data = PCI_DN(pdn)->iommu_table; return; } @@ -522,9 +521,9 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window, pci->phb->bus->number); pci->iommu_table = iommu_init_table(tbl, pci->phb->node); - DBG(" created table: %p\n", pci->iommu_table); + pr_debug(" created table: %p\n", pci->iommu_table); } else { - DBG(" found DMA window, table: %p\n", pci->iommu_table); + pr_debug(" found DMA window, table: %p\n", pci->iommu_table); } dev->dev.archdata.dma_data = pci->iommu_table; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 9235c469449..2cbaedb17f3 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -19,7 +19,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#undef DEBUG_LOW +/* Enables debugging of low-level hash table routines - careful! */ +#undef DEBUG #include <linux/kernel.h> #include <linux/dma-mapping.h> @@ -42,11 +43,6 @@ #include "plpar_wrappers.h" #include "pseries.h" -#ifdef DEBUG_LOW -#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0) -#else -#define DBG_LOW(fmt...) do { } while(0) -#endif /* in hvCall.S */ EXPORT_SYMBOL(plpar_hcall); @@ -196,6 +192,8 @@ void __init udbg_init_debug_lpar(void) udbg_putc = udbg_putcLP; udbg_getc = udbg_getcLP; udbg_getc_poll = udbg_getc_pollLP; + + register_early_udbg_console(); } /* returns 0 if couldn't find or use /chosen/stdout as console */ @@ -288,15 +286,15 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long hpte_v, hpte_r; if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " - "rflags=%lx, vflags=%lx, psize=%d)\n", - hpte_group, va, pa, rflags, vflags, psize); + pr_debug("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " + "rflags=%lx, vflags=%lx, psize=%d)\n", + hpte_group, va, pa, rflags, vflags, psize); hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID; hpte_r = hpte_encode_r(pa, psize) | rflags; if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); + pr_debug(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); /* Now fill in the actual HPTE */ /* Set CEC cookie to 0 */ @@ -313,7 +311,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); if (unlikely(lpar_rc == H_PTEG_FULL)) { if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" full\n"); + pr_debug(" full\n"); return -1; } @@ -324,11 +322,11 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, */ if (unlikely(lpar_rc != H_SUCCESS)) { if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" lpar err %d\n", lpar_rc); + pr_debug(" lpar err %lu\n", lpar_rc); return -2; } if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" -> slot: %d\n", slot & 7); + pr_debug(" -> slot: %lu\n", slot & 7); /* Because of iSeries, we have to pass down the secondary * bucket bit here as well @@ -420,17 +418,17 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, want_v = hpte_encode_avpn(va, psize, ssize); - DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ", - want_v, slot, flags, psize); + pr_debug(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", + want_v, slot, flags, psize); lpar_rc = plpar_pte_protect(flags, slot, want_v); if (lpar_rc == H_NOT_FOUND) { - DBG_LOW("not found !\n"); + pr_debug("not found !\n"); return -1; } - DBG_LOW("ok\n"); + pr_debug("ok\n"); BUG_ON(lpar_rc != H_SUCCESS); @@ -505,8 +503,8 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, unsigned long lpar_rc; unsigned long dummy1, dummy2; - DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d", - slot, va, psize, local); + pr_debug(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n", + slot, va, psize, local); want_v = hpte_encode_avpn(va, psize, ssize); lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index a1ab25c7082..2b548afd100 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -67,8 +67,6 @@ static int ras_check_exception_token; static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); static irqreturn_t ras_error_interrupt(int irq, void *dev_id); -/* #define DEBUG */ - static void request_ras_irqs(struct device_node *np, irq_handler_t handler, @@ -237,7 +235,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n", *((unsigned long *)&ras_log_buf), status); -#ifndef DEBUG +#ifndef DEBUG_RTAS_POWER_OFF /* Don't actually power off when debugging so we can test * without actually failing while injecting errors. * Error data will not be logged to syslog. diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index e3078ce4151..befadd4f952 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -29,11 +29,6 @@ #include <asm/atomic.h> #include <asm/machdep.h> -#if 0 -#define DEBUG(A...) printk(KERN_ERR A) -#else -#define DEBUG(A...) -#endif static DEFINE_SPINLOCK(rtasd_log_lock); @@ -198,7 +193,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) unsigned long s; int len = 0; - DEBUG("logging event\n"); + pr_debug("rtasd: logging event\n"); if (buf == NULL) return; @@ -409,7 +404,8 @@ static int rtasd(void *unused) daemonize("rtasd"); printk(KERN_DEBUG "RTAS daemon started\n"); - DEBUG("will sleep for %d milliseconds\n", (30000/rtas_event_scan_rate)); + pr_debug("rtasd: will sleep for %d milliseconds\n", + (30000 / rtas_event_scan_rate)); /* See if we have any error stored in NVRAM */ memset(logdata, 0, rtas_error_log_max); @@ -428,9 +424,9 @@ static int rtasd(void *unused) do_event_scan_all_cpus(1000); if (surveillance_timeout != -1) { - DEBUG("enabling surveillance\n"); + pr_debug("rtasd: enabling surveillance\n"); enable_surveillance(surveillance_timeout); - DEBUG("surveillance enabled\n"); + pr_debug("rtasd: surveillance enabled\n"); } /* Delay should be at least one second since some diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index e5b0ea87016..bec3803f061 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -38,9 +38,7 @@ #define SCANLOG_HWERROR -1 #define SCANLOG_CONTINUE 1 -#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0) -static int scanlog_debug; static unsigned int ibm_scan_log_dump; /* RTAS token */ static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */ @@ -86,14 +84,14 @@ static ssize_t scanlog_read(struct file *file, char __user *buf, memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE); spin_unlock(&rtas_data_buf_lock); - DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n", - status, data[0], data[1], data[2]); + pr_debug("scanlog: status=%d, data[0]=%x, data[1]=%x, " \ + "data[2]=%x\n", status, data[0], data[1], data[2]); switch (status) { case SCANLOG_COMPLETE: - DEBUG("hit eof\n"); + pr_debug("scanlog: hit eof\n"); return 0; case SCANLOG_HWERROR: - DEBUG("hardware error reading scan log data\n"); + pr_debug("scanlog: hardware error reading data\n"); return -EIO; case SCANLOG_CONTINUE: /* We may or may not have data yet */ @@ -110,7 +108,8 @@ static ssize_t scanlog_read(struct file *file, char __user *buf, /* Assume extended busy */ wait_time = rtas_busy_delay_time(status); if (!wait_time) { - printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status); + printk(KERN_ERR "scanlog: unknown error " \ + "from rtas: %d\n", status); return -EIO; } } @@ -134,15 +133,9 @@ static ssize_t scanlog_write(struct file * file, const char __user * buf, if (buf) { if (strncmp(stkbuf, "reset", 5) == 0) { - DEBUG("reset scanlog\n"); + pr_debug("scanlog: reset scanlog\n"); status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0); - DEBUG("rtas returns %d\n", status); - } else if (strncmp(stkbuf, "debugon", 7) == 0) { - printk(KERN_ERR "scanlog: debug on\n"); - scanlog_debug = 1; - } else if (strncmp(stkbuf, "debugoff", 8) == 0) { - printk(KERN_ERR "scanlog: debug off\n"); - scanlog_debug = 0; + pr_debug("scanlog: rtas returns %d\n", status); } } return count; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index f66aa9c3b13..f5d29f5b13c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -16,8 +16,6 @@ * bootup setup stuff.. */ -#undef DEBUG - #include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> @@ -70,11 +68,6 @@ #include "plpar_wrappers.h" #include "pseries.h" -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif int fwnmi_active; /* TRUE if an FWNMI handler is present */ @@ -326,7 +319,7 @@ static int pseries_set_xdabr(unsigned long dabr) */ static void __init pSeries_init_early(void) { - DBG(" -> pSeries_init_early()\n"); + pr_debug(" -> pSeries_init_early()\n"); if (firmware_has_feature(FW_FEATURE_LPAR)) find_udbg_vterm(); @@ -338,7 +331,7 @@ static void __init pSeries_init_early(void) iommu_init_early_pSeries(); - DBG(" <- pSeries_init_early()\n"); + pr_debug(" <- pSeries_init_early()\n"); } /* @@ -383,7 +376,7 @@ static int __init pSeries_probe(void) of_flat_dt_is_compatible(root, "IBM,CBEA")) return 0; - DBG("pSeries detected, looking for LPAR capability...\n"); + pr_debug("pSeries detected, looking for LPAR capability...\n"); /* Now try to figure out if we are running on LPAR */ of_scan_flat_dt(pSeries_probe_hypertas, NULL); @@ -393,8 +386,8 @@ static int __init pSeries_probe(void) else hpte_init_native(); - DBG("Machine is%s LPAR !\n", - (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); + pr_debug("Machine is%s LPAR !\n", + (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); return 1; } diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index ea4c65917a6..9d8f8c84ab8 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG #include <linux/kernel.h> #include <linux/module.h> @@ -51,12 +50,6 @@ #include "plpar_wrappers.h" #include "pseries.h" -#ifdef DEBUG -#include <asm/udbg.h> -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif /* * The primary thread of each non-boot processor is recorded here before @@ -231,7 +224,7 @@ static void __init smp_init_pseries(void) { int i; - DBG(" -> smp_init_pSeries()\n"); + pr_debug(" -> smp_init_pSeries()\n"); /* Mark threads which are still spinning in hold loops. */ if (cpu_has_feature(CPU_FTR_SMT)) { @@ -255,7 +248,7 @@ static void __init smp_init_pseries(void) smp_ops->take_timebase = pSeries_take_timebase; } - DBG(" <- smp_init_pSeries()\n"); + pr_debug(" <- smp_init_pSeries()\n"); } #ifdef CONFIG_MPIC diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 43df53c30aa..ebebc28fe89 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG #include <linux/types.h> #include <linux/threads.h> diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index d359d6e9297..7f59188cd9a 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -143,7 +143,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static int axon_ram_direct_access(struct block_device *device, sector_t sector, - unsigned long *data) + void **kaddr, unsigned long *pfn) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset; @@ -154,7 +154,8 @@ axon_ram_direct_access(struct block_device *device, sector_t sector, return -ERANGE; } - *data = bank->ph_addr + offset; + *kaddr = (void *)(bank->ph_addr + offset); + *pfn = virt_to_phys(kaddr) >> PAGE_SHIFT; return 0; } diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 5c1b246aacc..7b45670c7af 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -892,3 +892,44 @@ void fsl_rstcr_restart(char *cmd) while (1) ; } #endif + +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) +struct platform_diu_data_ops diu_ops = { + .diu_size = 1280 * 1024 * 4, /* default one 1280x1024 buffer */ +}; +EXPORT_SYMBOL(diu_ops); + +int __init preallocate_diu_videomemory(void) +{ + pr_debug("diu_size=%lu\n", diu_ops.diu_size); + + diu_ops.diu_mem = __alloc_bootmem(diu_ops.diu_size, 8, 0); + if (!diu_ops.diu_mem) { + printk(KERN_ERR "fsl-diu: cannot allocate %lu bytes\n", + diu_ops.diu_size); + return -ENOMEM; + } + + pr_debug("diu_mem=%p\n", diu_ops.diu_mem); + + rh_init(&diu_ops.diu_rh_info, 4096, ARRAY_SIZE(diu_ops.diu_rh_block), + diu_ops.diu_rh_block); + return rh_attach_region(&diu_ops.diu_rh_info, + (unsigned long) diu_ops.diu_mem, + diu_ops.diu_size); +} + +static int __init early_parse_diufb(char *p) +{ + if (!p) + return 1; + + diu_ops.diu_size = _ALIGN_UP(memparse(p, &p), 8); + + pr_debug("diu_size=%lu\n", diu_ops.diu_size); + + return 0; +} +early_param("diufb", early_parse_diufb); + +#endif diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h index 74c4a9657b3..52c831fa188 100644 --- a/arch/powerpc/sysdev/fsl_soc.h +++ b/arch/powerpc/sysdev/fsl_soc.h @@ -17,5 +17,28 @@ extern int fsl_spi_init(struct spi_board_info *board_infos, void (*deactivate_cs)(u8 cs, u8 polarity)); extern void fsl_rstcr_restart(char *cmd); + +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) +#include <linux/bootmem.h> +#include <asm/rheap.h> +struct platform_diu_data_ops { + rh_block_t diu_rh_block[16]; + rh_info_t diu_rh_info; + unsigned long diu_size; + void *diu_mem; + + unsigned int (*get_pixel_format) (unsigned int bits_per_pixel, + int monitor_port); + void (*set_gamma_table) (int monitor_port, char *gamma_table_base); + void (*set_monitor_port) (int monitor_port); + void (*set_pixel_clock) (unsigned int pixclock); + ssize_t (*show_monitor_port) (int monitor_port, char *buf); + int (*set_sysfs_monitor_port) (int val); +}; + +extern struct platform_diu_data_ops diu_ops; +int __init preallocate_diu_videomemory(void); +#endif + #endif #endif diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c index 047b31027fa..41af1223e2a 100644 --- a/arch/powerpc/sysdev/mv64x60_dev.c +++ b/arch/powerpc/sysdev/mv64x60_dev.c @@ -338,15 +338,13 @@ static int __init mv64x60_i2c_device_setup(struct device_node *np, int id) pdata.freq_m = 8; /* default */ prop = of_get_property(np, "freq_m", NULL); - if (!prop) - return -ENODEV; - pdata.freq_m = *prop; + if (prop) + pdata.freq_m = *prop; pdata.freq_m = 3; /* default */ prop = of_get_property(np, "freq_n", NULL); - if (!prop) - return -ENODEV; - pdata.freq_n = *prop; + if (prop) + pdata.freq_n = *prop; pdata.timeout = 1000; /* default: 1 second */ @@ -433,9 +431,13 @@ static int __init mv64x60_device_setup(void) int err; id = 0; - for_each_compatible_node(np, "serial", "marvell,mv64360-mpsc") - if ((err = mv64x60_mpsc_device_setup(np, id++))) - goto error; + for_each_compatible_node(np, "serial", "marvell,mv64360-mpsc") { + err = mv64x60_mpsc_device_setup(np, id++); + if (err) + printk(KERN_ERR "Failed to initialize MV64x60 " + "serial device %s: error %d.\n", + np->full_name, err); + } id = 0; id2 = 0; @@ -443,38 +445,44 @@ static int __init mv64x60_device_setup(void) pdev = mv64x60_eth_register_shared_pdev(np, id++); if (IS_ERR(pdev)) { err = PTR_ERR(pdev); - goto error; + printk(KERN_ERR "Failed to initialize MV64x60 " + "network block %s: error %d.\n", + np->full_name, err); + continue; } for_each_child_of_node(np, np2) { if (!of_device_is_compatible(np2, "marvell,mv64360-eth")) continue; err = mv64x60_eth_device_setup(np2, id2++, pdev); - if (err) { - of_node_put(np2); - goto error; - } + if (err) + printk(KERN_ERR "Failed to initialize " + "MV64x60 network device %s: " + "error %d.\n", + np2->full_name, err); } } id = 0; - for_each_compatible_node(np, "i2c", "marvell,mv64360-i2c") - if ((err = mv64x60_i2c_device_setup(np, id++))) - goto error; + for_each_compatible_node(np, "i2c", "marvell,mv64360-i2c") { + err = mv64x60_i2c_device_setup(np, id++); + if (err) + printk(KERN_ERR "Failed to initialize MV64x60 I2C " + "bus %s: error %d.\n", + np->full_name, err); + } /* support up to one watchdog timer */ np = of_find_compatible_node(np, NULL, "marvell,mv64360-wdt"); if (np) { if ((err = mv64x60_wdt_device_setup(np, id))) - goto error; + printk(KERN_ERR "Failed to initialize MV64x60 " + "Watchdog %s: error %d.\n", + np->full_name, err); of_node_put(np); } return 0; - -error: - of_node_put(np); - return err; } arch_initcall(mv64x60_device_setup); diff --git a/arch/powerpc/sysdev/mv64x60_udbg.c b/arch/powerpc/sysdev/mv64x60_udbg.c index ccdb3b0418f..2792dc8b038 100644 --- a/arch/powerpc/sysdev/mv64x60_udbg.c +++ b/arch/powerpc/sysdev/mv64x60_udbg.c @@ -94,7 +94,7 @@ static void mv64x60_udbg_init(void) if (!np) return; - block_index = of_get_property(np, "block-index", NULL); + block_index = of_get_property(np, "cell-index", NULL); if (!block_index) goto error; diff --git a/arch/ppc/8260_io/fcc_enet.c b/arch/ppc/8260_io/fcc_enet.c index bcc3aa9d04f..d38b57e24ce 100644 --- a/arch/ppc/8260_io/fcc_enet.c +++ b/arch/ppc/8260_io/fcc_enet.c @@ -165,9 +165,6 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr); #ifdef CONFIG_SBC82xx #define F1_RXCLK 9 #define F1_TXCLK 10 -#elif defined(CONFIG_ADS8272) -#define F1_RXCLK 11 -#define F1_TXCLK 10 #else #define F1_RXCLK 12 #define F1_TXCLK 11 @@ -175,13 +172,8 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr); /* FCC2 Clock Source Configuration. There are board specific. Can only choose from CLK13-16 */ -#ifdef CONFIG_ADS8272 -#define F2_RXCLK 15 -#define F2_TXCLK 16 -#else #define F2_RXCLK 13 #define F2_TXCLK 14 -#endif /* FCC3 Clock Source Configuration. There are board specific. Can only choose from CLK13-16 */ @@ -289,10 +281,7 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr); /* TQM8260 has MDIO and MDCK on PC30 and PC31 respectively */ #define PC_MDIO ((uint)0x00000002) #define PC_MDCK ((uint)0x00000001) -#elif defined(CONFIG_ADS8272) -#define PC_MDIO ((uint)0x00002000) -#define PC_MDCK ((uint)0x00001000) -#elif defined(CONFIG_EST8260) || defined(CONFIG_ADS8260) || defined(CONFIG_PQ2FADS) +#elif defined(CONFIG_EST8260) || defined(CONFIG_ADS8260) #define PC_MDIO ((uint)0x00400000) #define PC_MDCK ((uint)0x00200000) #else @@ -2118,11 +2107,6 @@ init_fcc_startup(fcc_info_t *fip, struct net_device *dev) printk("Can't get FCC IRQ %d\n", fip->fc_interrupt); #ifdef PHY_INTERRUPT -#ifdef CONFIG_ADS8272 - if (request_irq(PHY_INTERRUPT, mii_link_interrupt, IRQF_SHARED, - "mii", dev) < 0) - printk(KERN_CRIT "Can't get MII IRQ %d\n", PHY_INTERRUPT); -#else /* Make IRQn edge triggered. This does not work if PHY_INTERRUPT is * on Port C. */ @@ -2132,7 +2116,6 @@ init_fcc_startup(fcc_info_t *fip, struct net_device *dev) if (request_irq(PHY_INTERRUPT, mii_link_interrupt, 0, "mii", dev) < 0) printk(KERN_CRIT "Can't get MII IRQ %d\n", PHY_INTERRUPT); -#endif #endif /* PHY_INTERRUPT */ /* Set GFMR to enable Ethernet operating mode. diff --git a/arch/ppc/8xx_io/enet.c b/arch/ppc/8xx_io/enet.c index c6d047ae77a..5899aea1644 100644 --- a/arch/ppc/8xx_io/enet.c +++ b/arch/ppc/8xx_io/enet.c @@ -946,29 +946,6 @@ static int __init scc_enet_init(void) *((volatile uint *)BCSR1) &= ~BCSR1_ETHEN; #endif -#ifdef CONFIG_MPC885ADS - - /* Deassert PHY reset and enable the PHY. - */ - { - volatile uint __iomem *bcsr = ioremap(BCSR_ADDR, BCSR_SIZE); - uint tmp; - - tmp = in_be32(bcsr + 1 /* BCSR1 */); - tmp |= BCSR1_ETHEN; - out_be32(bcsr + 1, tmp); - tmp = in_be32(bcsr + 4 /* BCSR4 */); - tmp |= BCSR4_ETH10_RST; - out_be32(bcsr + 4, tmp); - iounmap(bcsr); - } - - /* On MPC885ADS SCC ethernet PHY defaults to the full duplex mode - * upon reset. SCC is set to half duplex by default. So this - * inconsistency should be better fixed by the software. - */ -#endif - dev->base_addr = (unsigned long)ep; #if 0 dev->name = "CPM_ENET"; diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index abc877faf12..0f1863ed9c1 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -372,22 +372,6 @@ config MPC8XXFADS bool "FADS" select FADS -config MPC86XADS - bool "MPC86XADS" - help - MPC86x Application Development System by Freescale Semiconductor. - The MPC86xADS is meant to serve as a platform for s/w and h/w - development around the MPC86X processor families. - select FADS - -config MPC885ADS - bool "MPC885ADS" - help - Freescale Semiconductor MPC885 Application Development System (ADS). - Also known as DUET. - The MPC885ADS is meant to serve as a platform for s/w and h/w - development around the MPC885 processor family. - config TQM823L bool "TQM823L" help @@ -479,53 +463,6 @@ config WINCEPT endchoice -menu "Freescale Ethernet driver platform-specific options" - depends on FS_ENET - - config MPC8xx_SECOND_ETH - bool "Second Ethernet channel" - depends on (MPC885ADS || MPC86XADS) - default y - help - This enables support for second Ethernet on MPC885ADS and MPC86xADS boards. - The latter will use SCC1, for 885ADS you can select it below. - - choice - prompt "Second Ethernet channel" - depends on MPC8xx_SECOND_ETH - default MPC8xx_SECOND_ETH_FEC2 - - config MPC8xx_SECOND_ETH_FEC2 - bool "FEC2" - depends on MPC885ADS - help - Enable FEC2 to serve as 2-nd Ethernet channel. Note that SMC2 - (often 2-nd UART) will not work if this is enabled. - - config MPC8xx_SECOND_ETH_SCC1 - bool "SCC1" - depends on MPC86XADS - select MPC8xx_SCC_ENET_FIXED - help - Enable SCC1 to serve as 2-nd Ethernet channel. Note that SMC1 - (often 1-nd UART) will not work if this is enabled. - - config MPC8xx_SECOND_ETH_SCC3 - bool "SCC3" - depends on MPC885ADS - help - Enable SCC3 to serve as 2-nd Ethernet channel. Note that SMC1 - (often 1-nd UART) will not work if this is enabled. - - endchoice - - config MPC8xx_SCC_ENET_FIXED - depends on MPC8xx_SECOND_ETH_SCC - default n - bool "Use fixed MII-less mode for SCC Ethernet" - -endmenu - choice prompt "Machine Type" depends on 6xx @@ -666,9 +603,6 @@ config TQM8260 End of Life: not yet :-) URL: <http://www.denx.de/PDF/TQM82xx_SPEC_Rev005.pdf> -config ADS8272 - bool "ADS8272" - config PQ2FADS bool "Freescale-PQ2FADS" help @@ -698,11 +632,6 @@ config EV64360 platform. endchoice -config PQ2ADS - bool - depends on ADS8272 - default y - config TQM8xxL bool depends on 8xx && (TQM823L || TQM850L || FPS850L || TQM855L || TQM860L) @@ -725,15 +654,6 @@ config 8260 this option means that you wish to build a kernel for a machine with an 8260 class CPU. -config 8272 - bool - depends on 6xx - default y if ADS8272 - select 8260 - help - The MPC8272 CPM has a different internal dpram setup than other CPM2 - devices - config CPM1 bool depends on 8xx @@ -1069,7 +989,7 @@ config PCI_8260 config 8260_PCI9 bool "Enable workaround for MPC826x erratum PCI 9" - depends on PCI_8260 && !ADS8272 + depends on PCI_8260 default y choice diff --git a/arch/ppc/configs/ads8272_defconfig b/arch/ppc/configs/ads8272_defconfig deleted file mode 100644 index 6619f9118b0..00000000000 --- a/arch/ppc/configs/ads8272_defconfig +++ /dev/null @@ -1,930 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.21-rc5 -# Wed Apr 4 20:55:16 2007 -# -CONFIG_MMU=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_ARCH_HAS_ILOG2_U32=y -# CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_PPC=y -CONFIG_PPC32=y -CONFIG_GENERIC_NVRAM=y -CONFIG_GENERIC_FIND_NEXT_BIT=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_GENERIC_BUG=y -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y -CONFIG_BROKEN_ON_SMP=y -CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# -CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -# CONFIG_IPC_NS is not set -CONFIG_SYSVIPC_SYSCTL=y -# CONFIG_POSIX_MQUEUE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_UTS_NS is not set -# CONFIG_AUDIT is not set -# CONFIG_IKCONFIG is not set -CONFIG_SYSFS_DEPRECATED=y -# CONFIG_RELAY is not set -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SYSCTL=y -CONFIG_EMBEDDED=y -CONFIG_SYSCTL_SYSCALL=y -# CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -# CONFIG_EPOLL is not set -CONFIG_SHMEM=y -CONFIG_SLAB=y -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 -# CONFIG_SLOB is not set - -# -# Loadable module support -# -# CONFIG_MODULES is not set - -# -# Block layer -# -CONFIG_BLOCK=y -# CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_LSF is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" - -# -# Processor -# -CONFIG_6xx=y -# CONFIG_40x is not set -# CONFIG_44x is not set -# CONFIG_8xx is not set -# CONFIG_E200 is not set -# CONFIG_E500 is not set -CONFIG_PPC_FPU=y -# CONFIG_PPC_DCR_NATIVE is not set -# CONFIG_KEXEC is not set -# CONFIG_CPU_FREQ is not set -# CONFIG_WANT_EARLY_SERIAL is not set -CONFIG_EMBEDDEDBOOT=y -CONFIG_PPC_STD_MMU=y - -# -# Platform options -# - -# -# Freescale Ethernet driver platform-specific options -# -# CONFIG_PPC_PREP is not set -# CONFIG_APUS is not set -# CONFIG_KATANA is not set -# CONFIG_WILLOW is not set -# CONFIG_CPCI690 is not set -# CONFIG_POWERPMC250 is not set -# CONFIG_CHESTNUT is not set -# CONFIG_SPRUCE is not set -# CONFIG_HDPU is not set -# CONFIG_EV64260 is not set -# CONFIG_LOPEC is not set -# CONFIG_MVME5100 is not set -# CONFIG_PPLUS is not set -# CONFIG_PRPMC750 is not set -# CONFIG_PRPMC800 is not set -# CONFIG_SANDPOINT is not set -# CONFIG_RADSTONE_PPC7D is not set -# CONFIG_PAL4 is not set -# CONFIG_EST8260 is not set -# CONFIG_SBC82xx is not set -# CONFIG_SBS8260 is not set -# CONFIG_RPX8260 is not set -# CONFIG_TQM8260 is not set -CONFIG_ADS8272=y -# CONFIG_PQ2FADS is not set -# CONFIG_LITE5200 is not set -# CONFIG_MPC834x_SYS is not set -# CONFIG_EV64360 is not set -CONFIG_PQ2ADS=y -CONFIG_8260=y -CONFIG_8272=y -CONFIG_CPM2=y -# CONFIG_PC_KEYBOARD is not set -# CONFIG_SMP is not set -# CONFIG_HIGHMEM is not set -CONFIG_ARCH_POPULATES_NODE_MAP=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y -# CONFIG_HZ_300 is not set -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set -# CONFIG_PREEMPT is not set -CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y -# CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set -CONFIG_SPLIT_PTLOCK_CPUS=4 -# CONFIG_RESOURCES_64BIT is not set -CONFIG_ZONE_DMA_FLAG=1 -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_CMDLINE_BOOL is not set -# CONFIG_PM is not set -CONFIG_SECCOMP=y -CONFIG_ISA_DMA_API=y - -# -# Bus options -# -CONFIG_ZONE_DMA=y -# CONFIG_PPC_I8259 is not set -CONFIG_PPC_INDIRECT_PCI=y -CONFIG_PCI=y -CONFIG_PCI_DOMAINS=y -CONFIG_PCI_8260=y - -# -# PCCARD (PCMCIA/CardBus) support -# - -# -# Advanced setup -# -# CONFIG_ADVANCED_OPTIONS is not set - -# -# Default settings for advanced configuration options are used -# -CONFIG_HIGHMEM_START=0xfe000000 -CONFIG_LOWMEM_SIZE=0x30000000 -CONFIG_KERNEL_START=0xc0000000 -CONFIG_TASK_SIZE=0x80000000 -CONFIG_BOOT_LOAD=0x00400000 - -# -# Networking -# -CONFIG_NET=y - -# -# Networking options -# -# CONFIG_NETDEBUG is not set -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -CONFIG_XFRM=y -# CONFIG_XFRM_USER is not set -# CONFIG_XFRM_SUB_POLICY is not set -# CONFIG_XFRM_MIGRATE is not set -# CONFIG_NET_KEY is not set -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set -# CONFIG_ARPD is not set -CONFIG_SYN_COOKIES=y -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_XFRM_TUNNEL is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_INET_XFRM_MODE_TRANSPORT=y -CONFIG_INET_XFRM_MODE_TUNNEL=y -CONFIG_INET_XFRM_MODE_BEET=y -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_CUBIC=y -CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set -# CONFIG_IPV6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set - -# -# DCCP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_DCCP is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set - -# -# TIPC Configuration (EXPERIMENTAL) -# -# CONFIG_TIPC is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -# CONFIG_IEEE80211 is not set - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_SYS_HYPERVISOR is not set - -# -# Connector - unified userspace <-> kernelspace linker -# -# CONFIG_CONNECTOR is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Plug and Play support -# -# CONFIG_PNPACPI is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -# CONFIG_BLK_DEV_COW_COMMON is not set -CONFIG_BLK_DEV_LOOP=y -# CONFIG_BLK_DEV_CRYPTOLOOP is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_SX8 is not set -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 -# CONFIG_CDROM_PKTCDVD is not set -# CONFIG_ATA_OVER_ETH is not set - -# -# Misc devices -# -# CONFIG_SGI_IOC4 is not set -# CONFIG_TIFM_CORE is not set - -# -# ATA/ATAPI/MFM/RLL support -# -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_RAID_ATTRS is not set -# CONFIG_SCSI is not set -# CONFIG_SCSI_NETLINK is not set - -# -# Serial ATA (prod) and Parallel ATA (experimental) drivers -# -# CONFIG_ATA is not set - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set - -# -# Fusion MPT device support -# -# CONFIG_FUSION is not set - -# -# IEEE 1394 (FireWire) support -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# -# CONFIG_I2O is not set - -# -# Macintosh device drivers -# -# CONFIG_MAC_EMUMOUSEBTN is not set -# CONFIG_WINDFARM is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set - -# -# PHY device support -# -CONFIG_PHYLIB=y - -# -# MII PHY device drivers -# -# CONFIG_MARVELL_PHY is not set -CONFIG_DAVICOM_PHY=y -# CONFIG_QSEMI_PHY is not set -# CONFIG_LXT_PHY is not set -# CONFIG_CICADA_PHY is not set -# CONFIG_VITESSE_PHY is not set -# CONFIG_SMSC_PHY is not set -# CONFIG_BROADCOM_PHY is not set -# CONFIG_FIXED_PHY is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNGEM is not set -# CONFIG_CASSINI is not set -# CONFIG_NET_VENDOR_3COM is not set - -# -# Tulip family network device support -# -# CONFIG_NET_TULIP is not set -# CONFIG_HP100 is not set -# CONFIG_NET_PCI is not set -CONFIG_FS_ENET=y -# CONFIG_FS_ENET_HAS_SCC is not set -CONFIG_FS_ENET_HAS_FCC=y - -# -# Ethernet (1000 Mbit) -# -# CONFIG_ACENIC is not set -# CONFIG_DL2K is not set -# CONFIG_E1000 is not set -# CONFIG_NS83820 is not set -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_R8169 is not set -# CONFIG_SIS190 is not set -# CONFIG_SKGE is not set -# CONFIG_SKY2 is not set -# CONFIG_SK98LIN is not set -# CONFIG_TIGON3 is not set -# CONFIG_BNX2 is not set -# CONFIG_QLA3XXX is not set -# CONFIG_ATL1 is not set - -# -# Ethernet (10000 Mbit) -# -# CONFIG_CHELSIO_T1 is not set -# CONFIG_CHELSIO_T3 is not set -# CONFIG_IXGB is not set -# CONFIG_S2IO is not set -# CONFIG_MYRI10GE is not set -# CONFIG_NETXEN_NIC is not set - -# -# Token Ring devices -# -# CONFIG_TR is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# Telephony Support -# -# CONFIG_PHONE is not set - -# -# Input device support -# -CONFIG_INPUT=y -# CONFIG_INPUT_FF_MEMLESS is not set - -# -# Userland interfaces -# -# CONFIG_INPUT_MOUSEDEV is not set -# CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set -# CONFIG_INPUT_EVDEV is not set -# CONFIG_INPUT_EVBUG is not set - -# -# Input Device Drivers -# -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set - -# -# Hardware I/O ports -# -# CONFIG_SERIO is not set -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -# CONFIG_VT is not set -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -# CONFIG_SERIAL_8250 is not set - -# -# Non-8250 serial port support -# -# CONFIG_SERIAL_UARTLITE is not set -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_SERIAL_CPM=y -CONFIG_SERIAL_CPM_CONSOLE=y -CONFIG_SERIAL_CPM_SCC1=y -# CONFIG_SERIAL_CPM_SCC2 is not set -# CONFIG_SERIAL_CPM_SCC3 is not set -CONFIG_SERIAL_CPM_SCC4=y -# CONFIG_SERIAL_CPM_SMC1 is not set -# CONFIG_SERIAL_CPM_SMC2 is not set -# CONFIG_SERIAL_JSM is not set -CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 - -# -# IPMI -# -# CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -CONFIG_HW_RANDOM=y -# CONFIG_NVRAM is not set -CONFIG_GEN_RTC=y -# CONFIG_GEN_RTC_X is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set -# CONFIG_AGP is not set -# CONFIG_DRM is not set -# CONFIG_RAW_DRIVER is not set - -# -# TPM devices -# -# CONFIG_TCG_TPM is not set - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# SPI support -# -# CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set - -# -# Dallas's 1-wire bus -# -# CONFIG_W1 is not set - -# -# Hardware Monitoring support -# -CONFIG_HWMON=y -# CONFIG_HWMON_VID is not set -# CONFIG_SENSORS_ABITUGURU is not set -# CONFIG_SENSORS_F71805F is not set -# CONFIG_SENSORS_PC87427 is not set -# CONFIG_SENSORS_VT1211 is not set -# CONFIG_HWMON_DEBUG_CHIP is not set - -# -# Multifunction device drivers -# -# CONFIG_MFD_SM501 is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set - -# -# Graphics support -# -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set -# CONFIG_FB is not set -# CONFIG_FB_IBM_GXT4500 is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# HID Devices -# -CONFIG_HID=y -# CONFIG_HID_DEBUG is not set - -# -# USB support -# -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y -CONFIG_USB_ARCH_HAS_EHCI=y -# CONFIG_USB is not set - -# -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' -# - -# -# USB Gadget Support -# -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -# CONFIG_MMC is not set - -# -# LED devices -# -# CONFIG_NEW_LEDS is not set - -# -# LED drivers -# - -# -# LED Triggers -# - -# -# InfiniBand support -# -# CONFIG_INFINIBAND is not set - -# -# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) -# - -# -# Real Time Clock -# -# CONFIG_RTC_CLASS is not set - -# -# DMA Engine support -# -# CONFIG_DMA_ENGINE is not set - -# -# DMA Clients -# - -# -# DMA Devices -# - -# -# Auxiliary Display support -# - -# -# Virtualization -# - -# -# File systems -# -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XIP is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -# CONFIG_EXT3_FS_POSIX_ACL is not set -# CONFIG_EXT3_FS_SECURITY is not set -# CONFIG_EXT4DEV_FS is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=y -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -CONFIG_FS_POSIX_ACL=y -# CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set -# CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -CONFIG_INOTIFY=y -CONFIG_INOTIFY_USER=y -# CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set -# CONFIG_FUSE_FS is not set - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_KCORE=y -CONFIG_PROC_SYSCTL=y -CONFIG_SYSFS=y -CONFIG_TMPFS=y -# CONFIG_TMPFS_POSIX_ACL is not set -# CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y -# CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -# CONFIG_NFS_DIRECTIO is not set -# CONFIG_NFSD is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_NFS_ACL_SUPPORT=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y -CONFIG_RPCSEC_GSS_KRB5=y -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set -# CONFIG_9P_FS is not set - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_KARMA_PARTITION is not set -# CONFIG_EFI_PARTITION is not set - -# -# Native Language Support -# -# CONFIG_NLS is not set - -# -# Distributed Lock Manager -# -# CONFIG_DLM is not set -# CONFIG_SCC_ENET is not set -# CONFIG_FEC_ENET is not set - -# -# CPM2 Options -# - -# -# Library routines -# -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC16 is not set -# CONFIG_CRC32 is not set -# CONFIG_LIBCRC32C is not set -CONFIG_PLIST=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT=y -# CONFIG_PROFILING is not set - -# -# Kernel hacking -# -# CONFIG_PRINTK_TIME is not set -CONFIG_ENABLE_MUST_CHECK=y -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_UNUSED_SYMBOLS is not set -# CONFIG_DEBUG_FS is not set -# CONFIG_HEADERS_CHECK is not set -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_KGDB_CONSOLE is not set - -# -# Security options -# -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_BLKCIPHER=y -CONFIG_CRYPTO_MANAGER=y -# CONFIG_CRYPTO_HMAC is not set -# CONFIG_CRYPTO_XCBC is not set -# CONFIG_CRYPTO_NULL is not set -# CONFIG_CRYPTO_MD4 is not set -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_SHA1 is not set -# CONFIG_CRYPTO_SHA256 is not set -# CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_WP512 is not set -# CONFIG_CRYPTO_TGR192 is not set -# CONFIG_CRYPTO_GF128MUL is not set -CONFIG_CRYPTO_ECB=y -CONFIG_CRYPTO_CBC=y -CONFIG_CRYPTO_PCBC=y -# CONFIG_CRYPTO_LRW is not set -CONFIG_CRYPTO_DES=y -# CONFIG_CRYPTO_FCRYPT is not set -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_AES is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_ARC4 is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_ANUBIS is not set -# CONFIG_CRYPTO_DEFLATE is not set -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_CRC32C is not set -# CONFIG_CRYPTO_CAMELLIA is not set - -# -# Hardware crypto devices -# diff --git a/arch/ppc/configs/mpc86x_ads_defconfig b/arch/ppc/configs/mpc86x_ads_defconfig deleted file mode 100644 index f63c6f59d68..00000000000 --- a/arch/ppc/configs/mpc86x_ads_defconfig +++ /dev/null @@ -1,633 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc4 -# Tue Jun 14 13:36:35 2005 -# -CONFIG_MMU=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_HAVE_DEC_LOCK=y -CONFIG_PPC=y -CONFIG_PPC32=y -CONFIG_GENERIC_NVRAM=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y -# CONFIG_CLEAN_COMPILE is not set -CONFIG_BROKEN=y -CONFIG_BROKEN_ON_SMP=y -CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# -CONFIG_LOCALVERSION="" -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -# CONFIG_POSIX_MQUEUE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -# CONFIG_AUDIT is not set -# CONFIG_HOTPLUG is not set -CONFIG_KOBJECT_UEVENT=y -# CONFIG_IKCONFIG is not set -CONFIG_EMBEDDED=y -# CONFIG_KALLSYMS is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -# CONFIG_BASE_FULL is not set -CONFIG_FUTEX=y -# CONFIG_EPOLL is not set -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SHMEM is not set -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 -CONFIG_TINY_SHMEM=y -CONFIG_BASE_SMALL=1 - -# -# Loadable module support -# -CONFIG_MODULES=y -# CONFIG_MODULE_UNLOAD is not set -CONFIG_OBSOLETE_MODPARM=y -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set - -# -# Processor -# -# CONFIG_6xx is not set -# CONFIG_40x is not set -# CONFIG_44x is not set -# CONFIG_POWER3 is not set -# CONFIG_POWER4 is not set -CONFIG_8xx=y -# CONFIG_E500 is not set -# CONFIG_MATH_EMULATION is not set -# CONFIG_CPU_FREQ is not set -CONFIG_EMBEDDEDBOOT=y -# CONFIG_PM is not set -CONFIG_NOT_COHERENT_CACHE=y - -# -# Platform options -# -CONFIG_FADS=y -# CONFIG_RPXLITE is not set -# CONFIG_RPXCLASSIC is not set -# CONFIG_BSEIP is not set -# CONFIG_MPC8XXFADS is not set -CONFIG_MPC86XADS=y -# CONFIG_TQM823L is not set -# CONFIG_TQM850L is not set -# CONFIG_TQM855L is not set -# CONFIG_TQM860L is not set -# CONFIG_FPS850L is not set -# CONFIG_SPD823TS is not set -# CONFIG_IVMS8 is not set -# CONFIG_IVML24 is not set -# CONFIG_SM850 is not set -# CONFIG_HERMES_PRO is not set -# CONFIG_IP860 is not set -# CONFIG_LWMON is not set -# CONFIG_PCU_E is not set -# CONFIG_CCM is not set -# CONFIG_LANTEC is not set -# CONFIG_MBX is not set -# CONFIG_WINCEPT is not set -# CONFIG_SMP is not set -# CONFIG_PREEMPT is not set -# CONFIG_HIGHMEM is not set -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_CMDLINE_BOOL is not set -CONFIG_ISA_DMA_API=y - -# -# Bus options -# -# CONFIG_PCI is not set -# CONFIG_PCI_DOMAINS is not set -# CONFIG_PCI_QSPAN is not set - -# -# PCCARD (PCMCIA/CardBus) support -# -# CONFIG_PCCARD is not set - -# -# Advanced setup -# -# CONFIG_ADVANCED_OPTIONS is not set - -# -# Default settings for advanced configuration options are used -# -CONFIG_HIGHMEM_START=0xfe000000 -CONFIG_LOWMEM_SIZE=0x30000000 -CONFIG_KERNEL_START=0xc0000000 -CONFIG_TASK_SIZE=0x80000000 -CONFIG_CONSISTENT_START=0xff100000 -CONFIG_CONSISTENT_SIZE=0x00200000 -CONFIG_BOOT_LOAD=0x00400000 - -# -# Device Drivers -# - -# -# Generic Driver Options -# -# CONFIG_STANDALONE is not set -CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Plug and Play support -# - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_COW_COMMON is not set -CONFIG_BLK_DEV_LOOP=y -# CONFIG_BLK_DEV_CRYPTOLOOP is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_RAM is not set -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_INITRAMFS_SOURCE="" -# CONFIG_LBD is not set -# CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_ATA_OVER_ETH is not set - -# -# ATA/ATAPI/MFM/RLL support -# -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set - -# -# Fusion MPT device support -# - -# -# IEEE 1394 (FireWire) support -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# - -# -# Macintosh device drivers -# - -# -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_IP_TCPDIAG=y -# CONFIG_IP_TCPDIAG_IPV6 is not set -CONFIG_IPV6=m -# CONFIG_IPV6_PRIVACY is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set -# CONFIG_INET6_IPCOMP is not set -# CONFIG_INET6_TUNNEL is not set -# CONFIG_IPV6_TUNNEL is not set -# CONFIG_NETFILTER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set -# CONFIG_NET_CLS_ROUTE is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -CONFIG_NETDEVICES=y -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_MII is not set -# CONFIG_OAKNET is not set - -# -# Ethernet (1000 Mbit) -# - -# -# Ethernet (10000 Mbit) -# - -# -# Token Ring devices -# - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# Telephony Support -# -# CONFIG_PHONE is not set - -# -# Input device support -# -# CONFIG_INPUT is not set - -# -# Hardware I/O ports -# -# CONFIG_SERIO is not set -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y - -# -# Character devices -# -# CONFIG_VT is not set -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -# CONFIG_SERIAL_8250 is not set - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_SERIAL_CPM=y -CONFIG_SERIAL_CPM_CONSOLE=y -# CONFIG_SERIAL_CPM_SCC1 is not set -# CONFIG_SERIAL_CPM_SCC2 is not set -# CONFIG_SERIAL_CPM_SCC3 is not set -# CONFIG_SERIAL_CPM_SCC4 is not set -CONFIG_SERIAL_CPM_SMC1=y -# CONFIG_SERIAL_CPM_SMC2 is not set -CONFIG_UNIX98_PTYS=y -# CONFIG_LEGACY_PTYS is not set - -# -# IPMI -# -# CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_NVRAM is not set -# CONFIG_GEN_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_AGP is not set -# CONFIG_DRM is not set -# CONFIG_RAW_DRIVER is not set - -# -# TPM devices -# - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Dallas's 1-wire bus -# -# CONFIG_W1 is not set - -# -# Misc devices -# - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set - -# -# Graphics support -# -# CONFIG_FB is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB_ARCH_HAS_HCD is not set -# CONFIG_USB_ARCH_HAS_OHCI is not set - -# -# USB Gadget Support -# -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -# CONFIG_MMC is not set - -# -# InfiniBand support -# -# CONFIG_INFINIBAND is not set - -# -# File systems -# -# CONFIG_EXT2_FS is not set -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set - -# -# XFS support -# -# CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_QUOTA is not set -# CONFIG_DNOTIFY is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_KCORE=y -CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVPTS_FS_XATTR is not set -# CONFIG_TMPFS is not set -# CONFIG_HUGETLBFS is not set -# CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V4=y -# CONFIG_NFS_DIRECTIO is not set -# CONFIG_NFSD is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y -CONFIG_RPCSEC_GSS_KRB5=y -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# -# CONFIG_NLS is not set - -# -# MPC8xx CPM Options -# -CONFIG_SCC_ENET=y -CONFIG_SCC1_ENET=y -# CONFIG_SCC2_ENET is not set -# CONFIG_SCC3_ENET is not set -# CONFIG_FEC_ENET is not set -# CONFIG_ENET_BIG_BUFFERS is not set - -# -# Generic MPC8xx Options -# -# CONFIG_8xx_COPYBACK is not set -# CONFIG_8xx_CPU6 is not set -CONFIG_NO_UCODE_PATCH=y -# CONFIG_USB_SOF_UCODE_PATCH is not set -# CONFIG_I2C_SPI_UCODE_PATCH is not set -# CONFIG_I2C_SPI_SMC1_UCODE_PATCH is not set - -# -# Library routines -# -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC32 is not set -# CONFIG_LIBCRC32C is not set - -# -# Profiling support -# -# CONFIG_PROFILING is not set - -# -# Kernel hacking -# -# CONFIG_PRINTK_TIME is not set -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 - -# -# Security options -# -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -# CONFIG_CRYPTO_HMAC is not set -# CONFIG_CRYPTO_NULL is not set -# CONFIG_CRYPTO_MD4 is not set -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_SHA1 is not set -# CONFIG_CRYPTO_SHA256 is not set -# CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_WP512 is not set -# CONFIG_CRYPTO_TGR192 is not set -CONFIG_CRYPTO_DES=y -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_AES is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_ARC4 is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_ANUBIS is not set -# CONFIG_CRYPTO_DEFLATE is not set -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_CRC32C is not set -# CONFIG_CRYPTO_TEST is not set - -# -# Hardware crypto devices -# diff --git a/arch/ppc/configs/mpc885ads_defconfig b/arch/ppc/configs/mpc885ads_defconfig deleted file mode 100644 index 016f94d9325..00000000000 --- a/arch/ppc/configs/mpc885ads_defconfig +++ /dev/null @@ -1,622 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc6 -# Thu Jun 9 21:17:29 2005 -# -CONFIG_MMU=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_HAVE_DEC_LOCK=y -CONFIG_PPC=y -CONFIG_PPC32=y -CONFIG_GENERIC_NVRAM=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y -# CONFIG_CLEAN_COMPILE is not set -CONFIG_BROKEN=y -CONFIG_BROKEN_ON_SMP=y -CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# -CONFIG_LOCALVERSION="" -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -# CONFIG_POSIX_MQUEUE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -# CONFIG_AUDIT is not set -CONFIG_HOTPLUG=y -CONFIG_KOBJECT_UEVENT=y -# CONFIG_IKCONFIG is not set -CONFIG_EMBEDDED=y -# CONFIG_KALLSYMS is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -# CONFIG_EPOLL is not set -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 - -# -# Loadable module support -# -# CONFIG_MODULES is not set - -# -# Processor -# -# CONFIG_6xx is not set -# CONFIG_40x is not set -# CONFIG_44x is not set -# CONFIG_POWER3 is not set -# CONFIG_POWER4 is not set -CONFIG_8xx=y -# CONFIG_E500 is not set -# CONFIG_MATH_EMULATION is not set -# CONFIG_CPU_FREQ is not set -CONFIG_EMBEDDEDBOOT=y -# CONFIG_PM is not set -CONFIG_NOT_COHERENT_CACHE=y - -# -# Platform options -# -# CONFIG_RPXLITE is not set -# CONFIG_RPXCLASSIC is not set -# CONFIG_BSEIP is not set -# CONFIG_FADS is not set -CONFIG_MPC885ADS=y -# CONFIG_TQM823L is not set -# CONFIG_TQM850L is not set -# CONFIG_TQM855L is not set -# CONFIG_TQM860L is not set -# CONFIG_FPS850L is not set -# CONFIG_SPD823TS is not set -# CONFIG_IVMS8 is not set -# CONFIG_IVML24 is not set -# CONFIG_SM850 is not set -# CONFIG_HERMES_PRO is not set -# CONFIG_IP860 is not set -# CONFIG_LWMON is not set -# CONFIG_PCU_E is not set -# CONFIG_CCM is not set -# CONFIG_LANTEC is not set -# CONFIG_MBX is not set -# CONFIG_WINCEPT is not set -# CONFIG_SMP is not set -# CONFIG_PREEMPT is not set -# CONFIG_HIGHMEM is not set -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_CMDLINE_BOOL is not set -CONFIG_ISA_DMA_API=y - -# -# Bus options -# -# CONFIG_PCI is not set -# CONFIG_PCI_DOMAINS is not set -# CONFIG_PCI_QSPAN is not set - -# -# PCCARD (PCMCIA/CardBus) support -# -# CONFIG_PCCARD is not set - -# -# Advanced setup -# -# CONFIG_ADVANCED_OPTIONS is not set - -# -# Default settings for advanced configuration options are used -# -CONFIG_HIGHMEM_START=0xfe000000 -CONFIG_LOWMEM_SIZE=0x30000000 -CONFIG_KERNEL_START=0xc0000000 -CONFIG_TASK_SIZE=0x80000000 -CONFIG_CONSISTENT_START=0xff100000 -CONFIG_CONSISTENT_SIZE=0x00200000 -CONFIG_BOOT_LOAD=0x00400000 - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Plug and Play support -# - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_COW_COMMON is not set -# CONFIG_BLK_DEV_LOOP is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_RAM is not set -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_INITRAMFS_SOURCE="" -# CONFIG_LBD is not set -# CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -# CONFIG_IOSCHED_AS is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set -# CONFIG_ATA_OVER_ETH is not set - -# -# ATA/ATAPI/MFM/RLL support -# -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set - -# -# Fusion MPT device support -# - -# -# IEEE 1394 (FireWire) support -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# - -# -# Macintosh device drivers -# - -# -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_IP_TCPDIAG=y -# CONFIG_IP_TCPDIAG_IPV6 is not set -# CONFIG_IPV6 is not set -# CONFIG_NETFILTER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set -# CONFIG_NET_CLS_ROUTE is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -CONFIG_NETDEVICES=y -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_OAKNET is not set - -# -# Ethernet (1000 Mbit) -# - -# -# Ethernet (10000 Mbit) -# - -# -# Token Ring devices -# - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set -CONFIG_PPP=y -# CONFIG_PPP_MULTILINK is not set -# CONFIG_PPP_FILTER is not set -CONFIG_PPP_ASYNC=y -CONFIG_PPP_SYNC_TTY=y -CONFIG_PPP_DEFLATE=y -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -# CONFIG_SLIP is not set -# CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# Telephony Support -# -# CONFIG_PHONE is not set - -# -# Input device support -# -# CONFIG_INPUT is not set - -# -# Hardware I/O ports -# -# CONFIG_SERIO is not set -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -# CONFIG_VT is not set -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -# CONFIG_SERIAL_8250 is not set - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_SERIAL_CPM=y -CONFIG_SERIAL_CPM_CONSOLE=y -# CONFIG_SERIAL_CPM_SCC1 is not set -# CONFIG_SERIAL_CPM_SCC2 is not set -# CONFIG_SERIAL_CPM_SCC3 is not set -# CONFIG_SERIAL_CPM_SCC4 is not set -CONFIG_SERIAL_CPM_SMC1=y -CONFIG_SERIAL_CPM_SMC2=y -CONFIG_UNIX98_PTYS=y -# CONFIG_LEGACY_PTYS is not set - -# -# IPMI -# -# CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_NVRAM is not set -# CONFIG_GEN_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_AGP is not set -# CONFIG_DRM is not set -# CONFIG_RAW_DRIVER is not set - -# -# TPM devices -# - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Dallas's 1-wire bus -# -# CONFIG_W1 is not set - -# -# Misc devices -# - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set - -# -# Graphics support -# -# CONFIG_FB is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB_ARCH_HAS_HCD is not set -# CONFIG_USB_ARCH_HAS_OHCI is not set - -# -# USB Gadget Support -# -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -# CONFIG_MMC is not set - -# -# InfiniBand support -# -# CONFIG_INFINIBAND is not set - -# -# File systems -# -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -# CONFIG_EXT2_FS_POSIX_ACL is not set -# CONFIG_EXT2_FS_SECURITY is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -# CONFIG_EXT3_FS_POSIX_ACL is not set -# CONFIG_EXT3_FS_SECURITY is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=y -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set - -# -# XFS support -# -# CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_QUOTA is not set -# CONFIG_DNOTIFY is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -# CONFIG_PROC_KCORE is not set -CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVPTS_FS_XATTR is not set -# CONFIG_TMPFS is not set -# CONFIG_HUGETLBFS is not set -# CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -# CONFIG_NFS_V3 is not set -# CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -# CONFIG_NFSD is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_SUNRPC=y -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_EFI_PARTITION is not set - -# -# Native Language Support -# -# CONFIG_NLS is not set - -# -# MPC8xx CPM Options -# -CONFIG_SCC_ENET=y -# CONFIG_SCC1_ENET is not set -# CONFIG_SCC2_ENET is not set -CONFIG_SCC3_ENET=y -# CONFIG_FEC_ENET is not set -# CONFIG_ENET_BIG_BUFFERS is not set - -# -# Generic MPC8xx Options -# -CONFIG_8xx_COPYBACK=y -CONFIG_8xx_CPU6=y -CONFIG_NO_UCODE_PATCH=y -# CONFIG_USB_SOF_UCODE_PATCH is not set -# CONFIG_I2C_SPI_UCODE_PATCH is not set -# CONFIG_I2C_SPI_SMC1_UCODE_PATCH is not set - -# -# Library routines -# -CONFIG_CRC_CCITT=y -# CONFIG_CRC32 is not set -# CONFIG_LIBCRC32C is not set -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y - -# -# Profiling support -# -# CONFIG_PROFILING is not set - -# -# Kernel hacking -# -# CONFIG_PRINTK_TIME is not set -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 - -# -# Security options -# -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -# CONFIG_CRYPTO is not set - -# -# Hardware crypto devices -# diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c index 2ba659f401b..d9036ef0b65 100644 --- a/arch/ppc/kernel/ppc_ksyms.c +++ b/arch/ppc/kernel/ppc_ksyms.c @@ -88,6 +88,7 @@ EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strcat); EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); diff --git a/arch/ppc/lib/string.S b/arch/ppc/lib/string.S index 84ed33ab4c2..927253bfc82 100644 --- a/arch/ppc/lib/string.S +++ b/arch/ppc/lib/string.S @@ -121,6 +121,20 @@ _GLOBAL(strcmp) beq 1b blr +_GLOBAL(strncmp) + PPC_LCMPI r5,0 + beqlr + mtctr r5 + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r5) + cmpwi 1,r3,0 + lbzu r0,1(r4) + subf. r3,r0,r3 + beqlr 1 + bdnzt eq,1b + blr + _GLOBAL(strlen) addi r4,r3,-1 1: lbzu r0,1(r4) diff --git a/arch/ppc/platforms/Makefile b/arch/ppc/platforms/Makefile index 40f53fbe6d3..6260231987c 100644 --- a/arch/ppc/platforms/Makefile +++ b/arch/ppc/platforms/Makefile @@ -4,7 +4,6 @@ obj-$(CONFIG_PPC_PREP) += prep_pci.o prep_setup.o obj-$(CONFIG_PREP_RESIDUAL) += residual.o -obj-$(CONFIG_PQ2ADS) += pq2ads.o obj-$(CONFIG_TQM8260) += tqm8260_setup.o obj-$(CONFIG_CPCI690) += cpci690.o obj-$(CONFIG_EV64260) += ev64260.o @@ -24,6 +23,3 @@ obj-$(CONFIG_SBC82xx) += sbc82xx.o obj-$(CONFIG_SPRUCE) += spruce.o obj-$(CONFIG_LITE5200) += lite5200.o obj-$(CONFIG_EV64360) += ev64360.o -obj-$(CONFIG_MPC86XADS) += mpc866ads_setup.o -obj-$(CONFIG_MPC885ADS) += mpc885ads_setup.o -obj-$(CONFIG_ADS8272) += mpc8272ads_setup.o diff --git a/arch/ppc/platforms/fads.h b/arch/ppc/platforms/fads.h index 2f9f0f60e3f..5219366667b 100644 --- a/arch/ppc/platforms/fads.h +++ b/arch/ppc/platforms/fads.h @@ -22,29 +22,6 @@ #include <asm/ppcboot.h> -#if defined(CONFIG_MPC86XADS) - -#define BOARD_CHIP_NAME "MPC86X" - -/* U-Boot maps BCSR to 0xff080000 */ -#define BCSR_ADDR ((uint)0xff080000) - -/* MPC86XADS has one more CPLD and an additional BCSR. - */ -#define CFG_PHYDEV_ADDR ((uint)0xff0a0000) -#define BCSR5 ((uint)(CFG_PHYDEV_ADDR + 0x300)) - -#define BCSR5_T1_RST 0x10 -#define BCSR5_ATM155_RST 0x08 -#define BCSR5_ATM25_RST 0x04 -#define BCSR5_MII1_EN 0x02 -#define BCSR5_MII1_RST 0x01 - -/* There is no PHY link change interrupt */ -#define PHY_INTERRUPT (-1) - -#else /* FADS */ - /* Memory map is configured by the PROM startup. * I tried to follow the FADS manual, although the startup PROM * dictates this and we simply have to move some of the physical @@ -55,8 +32,6 @@ /* PHY link change interrupt */ #define PHY_INTERRUPT SIU_IRQ2 -#endif /* CONFIG_MPC86XADS */ - #define BCSR_SIZE ((uint)(64 * 1024)) #define BCSR0 ((uint)(BCSR_ADDR + 0x00)) #define BCSR1 ((uint)(BCSR_ADDR + 0x04)) diff --git a/arch/ppc/platforms/mpc8272ads_setup.c b/arch/ppc/platforms/mpc8272ads_setup.c deleted file mode 100644 index 47f4b38edb5..00000000000 --- a/arch/ppc/platforms/mpc8272ads_setup.c +++ /dev/null @@ -1,367 +0,0 @@ -/* - * arch/ppc/platforms/mpc8272ads_setup.c - * - * MPC82xx Board-specific PlatformDevice descriptions - * - * 2005 (c) MontaVista Software, Inc. - * Vitaly Bordug <vbordug@ru.mvista.com> - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/device.h> -#include <linux/ioport.h> -#include <linux/fs_enet_pd.h> -#include <linux/platform_device.h> -#include <linux/phy.h> - -#include <asm/io.h> -#include <asm/mpc8260.h> -#include <asm/cpm2.h> -#include <asm/immap_cpm2.h> -#include <asm/irq.h> -#include <asm/ppc_sys.h> -#include <asm/ppcboot.h> -#include <linux/fs_uart_pd.h> - -#include "pq2ads_pd.h" - -static void init_fcc1_ioports(struct fs_platform_info*); -static void init_fcc2_ioports(struct fs_platform_info*); -static void init_scc1_uart_ioports(struct fs_uart_platform_info*); -static void init_scc4_uart_ioports(struct fs_uart_platform_info*); - -static struct fs_uart_platform_info mpc8272_uart_pdata[] = { - [fsid_scc1_uart] = { - .init_ioports = init_scc1_uart_ioports, - .fs_no = fsid_scc1_uart, - .brg = 1, - .tx_num_fifo = 4, - .tx_buf_size = 32, - .rx_num_fifo = 4, - .rx_buf_size = 32, - }, - [fsid_scc4_uart] = { - .init_ioports = init_scc4_uart_ioports, - .fs_no = fsid_scc4_uart, - .brg = 4, - .tx_num_fifo = 4, - .tx_buf_size = 32, - .rx_num_fifo = 4, - .rx_buf_size = 32, - }, -}; - -static struct fs_mii_bb_platform_info m82xx_mii_bb_pdata = { - .mdio_dat.bit = 18, - .mdio_dir.bit = 18, - .mdc_dat.bit = 19, - .delay = 1, -}; - -static struct fs_platform_info mpc82xx_enet_pdata[] = { - [fsid_fcc1] = { - .fs_no = fsid_fcc1, - .cp_page = CPM_CR_FCC1_PAGE, - .cp_block = CPM_CR_FCC1_SBLOCK, - - .clk_trx = (PC_F1RXCLK | PC_F1TXCLK), - .clk_route = CMX1_CLK_ROUTE, - .clk_mask = CMX1_CLK_MASK, - .init_ioports = init_fcc1_ioports, - - .mem_offset = FCC1_MEM_OFFSET, - - .rx_ring = 32, - .tx_ring = 32, - .rx_copybreak = 240, - .use_napi = 0, - .napi_weight = 17, - .bus_id = "0:00", - }, - [fsid_fcc2] = { - .fs_no = fsid_fcc2, - .cp_page = CPM_CR_FCC2_PAGE, - .cp_block = CPM_CR_FCC2_SBLOCK, - .clk_trx = (PC_F2RXCLK | PC_F2TXCLK), - .clk_route = CMX2_CLK_ROUTE, - .clk_mask = CMX2_CLK_MASK, - .init_ioports = init_fcc2_ioports, - - .mem_offset = FCC2_MEM_OFFSET, - - .rx_ring = 32, - .tx_ring = 32, - .rx_copybreak = 240, - .use_napi = 0, - .napi_weight = 17, - .bus_id = "0:03", - }, -}; - -static void init_fcc1_ioports(struct fs_platform_info* pdata) -{ - struct io_port *io; - u32 tempval; - cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t)); - u32 *bcsr = ioremap(BCSR_ADDR+4, sizeof(u32)); - - io = &immap->im_ioport; - - /* Enable the PHY */ - clrbits32(bcsr, BCSR1_FETHIEN); - setbits32(bcsr, BCSR1_FETH_RST); - - /* FCC1 pins are on port A/C. */ - /* Configure port A and C pins for FCC1 Ethernet. */ - - tempval = in_be32(&io->iop_pdira); - tempval &= ~PA1_DIRA0; - tempval |= PA1_DIRA1; - out_be32(&io->iop_pdira, tempval); - - tempval = in_be32(&io->iop_psora); - tempval &= ~PA1_PSORA0; - tempval |= PA1_PSORA1; - out_be32(&io->iop_psora, tempval); - - setbits32(&io->iop_ppara,PA1_DIRA0 | PA1_DIRA1); - - /* Alter clocks */ - tempval = PC_F1TXCLK|PC_F1RXCLK; - - clrbits32(&io->iop_psorc, tempval); - clrbits32(&io->iop_pdirc, tempval); - setbits32(&io->iop_pparc, tempval); - - clrbits32(&immap->im_cpmux.cmx_fcr, CMX1_CLK_MASK); - setbits32(&immap->im_cpmux.cmx_fcr, CMX1_CLK_ROUTE); - iounmap(bcsr); - iounmap(immap); -} - -static void init_fcc2_ioports(struct fs_platform_info* pdata) -{ - cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t)); - u32 *bcsr = ioremap(BCSR_ADDR+12, sizeof(u32)); - - struct io_port *io; - u32 tempval; - - immap = cpm2_immr; - - io = &immap->im_ioport; - - /* Enable the PHY */ - clrbits32(bcsr, BCSR3_FETHIEN2); - setbits32(bcsr, BCSR3_FETH2_RST); - - /* FCC2 are port B/C. */ - /* Configure port A and C pins for FCC2 Ethernet. */ - - tempval = in_be32(&io->iop_pdirb); - tempval &= ~PB2_DIRB0; - tempval |= PB2_DIRB1; - out_be32(&io->iop_pdirb, tempval); - - tempval = in_be32(&io->iop_psorb); - tempval &= ~PB2_PSORB0; - tempval |= PB2_PSORB1; - out_be32(&io->iop_psorb, tempval); - - setbits32(&io->iop_pparb,PB2_DIRB0 | PB2_DIRB1); - - tempval = PC_F2RXCLK|PC_F2TXCLK; - - /* Alter clocks */ - clrbits32(&io->iop_psorc,tempval); - clrbits32(&io->iop_pdirc,tempval); - setbits32(&io->iop_pparc,tempval); - - clrbits32(&immap->im_cpmux.cmx_fcr, CMX2_CLK_MASK); - setbits32(&immap->im_cpmux.cmx_fcr, CMX2_CLK_ROUTE); - - iounmap(bcsr); - iounmap(immap); -} - - -static void __init mpc8272ads_fixup_enet_pdata(struct platform_device *pdev, - int idx) -{ - bd_t* bi = (void*)__res; - int fs_no = fsid_fcc1+pdev->id-1; - - if(fs_no >= ARRAY_SIZE(mpc82xx_enet_pdata)) { - return; - } - - mpc82xx_enet_pdata[fs_no].dpram_offset= - (u32)cpm2_immr->im_dprambase; - mpc82xx_enet_pdata[fs_no].fcc_regs_c = - (u32)cpm2_immr->im_fcc_c; - memcpy(&mpc82xx_enet_pdata[fs_no].macaddr,bi->bi_enetaddr,6); - - /* prevent dup mac */ - if(fs_no == fsid_fcc2) - mpc82xx_enet_pdata[fs_no].macaddr[5] ^= 1; - - pdev->dev.platform_data = &mpc82xx_enet_pdata[fs_no]; -} - -static void mpc8272ads_fixup_uart_pdata(struct platform_device *pdev, - int idx) -{ - bd_t *bd = (bd_t *) __res; - struct fs_uart_platform_info *pinfo; - int num = ARRAY_SIZE(mpc8272_uart_pdata); - int id = fs_uart_id_scc2fsid(idx); - - /* no need to alter anything if console */ - if ((id < num) && (!pdev->dev.platform_data)) { - pinfo = &mpc8272_uart_pdata[id]; - pinfo->uart_clk = bd->bi_intfreq; - pdev->dev.platform_data = pinfo; - } -} - -static void init_scc1_uart_ioports(struct fs_uart_platform_info* pdata) -{ - cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t)); - - /* SCC1 is only on port D */ - setbits32(&immap->im_ioport.iop_ppard,0x00000003); - clrbits32(&immap->im_ioport.iop_psord,0x00000001); - setbits32(&immap->im_ioport.iop_psord,0x00000002); - clrbits32(&immap->im_ioport.iop_pdird,0x00000001); - setbits32(&immap->im_ioport.iop_pdird,0x00000002); - - /* Wire BRG1 to SCC1 */ - clrbits32(&immap->im_cpmux.cmx_scr,0x00ffffff); - - iounmap(immap); -} - -static void init_scc4_uart_ioports(struct fs_uart_platform_info* pdata) -{ - cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t)); - - setbits32(&immap->im_ioport.iop_ppard,0x00000600); - clrbits32(&immap->im_ioport.iop_psord,0x00000600); - clrbits32(&immap->im_ioport.iop_pdird,0x00000200); - setbits32(&immap->im_ioport.iop_pdird,0x00000400); - - /* Wire BRG4 to SCC4 */ - clrbits32(&immap->im_cpmux.cmx_scr,0x000000ff); - setbits32(&immap->im_cpmux.cmx_scr,0x0000001b); - - iounmap(immap); -} - -static void __init mpc8272ads_fixup_mdio_pdata(struct platform_device *pdev, - int idx) -{ - m82xx_mii_bb_pdata.irq[0] = PHY_INTERRUPT; - m82xx_mii_bb_pdata.irq[1] = PHY_POLL; - m82xx_mii_bb_pdata.irq[2] = PHY_POLL; - m82xx_mii_bb_pdata.irq[3] = PHY_INTERRUPT; - m82xx_mii_bb_pdata.irq[31] = PHY_POLL; - - - m82xx_mii_bb_pdata.mdio_dat.offset = - (u32)&cpm2_immr->im_ioport.iop_pdatc; - - m82xx_mii_bb_pdata.mdio_dir.offset = - (u32)&cpm2_immr->im_ioport.iop_pdirc; - - m82xx_mii_bb_pdata.mdc_dat.offset = - (u32)&cpm2_immr->im_ioport.iop_pdatc; - - - pdev->dev.platform_data = &m82xx_mii_bb_pdata; -} - -static int mpc8272ads_platform_notify(struct device *dev) -{ - static const struct platform_notify_dev_map dev_map[] = { - { - .bus_id = "fsl-cpm-fcc", - .rtn = mpc8272ads_fixup_enet_pdata, - }, - { - .bus_id = "fsl-cpm-scc:uart", - .rtn = mpc8272ads_fixup_uart_pdata, - }, - { - .bus_id = "fsl-bb-mdio", - .rtn = mpc8272ads_fixup_mdio_pdata, - }, - { - .bus_id = NULL - } - }; - platform_notify_map(dev_map,dev); - - return 0; - -} - -int __init mpc8272ads_init(void) -{ - printk(KERN_NOTICE "mpc8272ads: Init\n"); - - platform_notify = mpc8272ads_platform_notify; - - ppc_sys_device_initfunc(); - - ppc_sys_device_disable_all(); - ppc_sys_device_enable(MPC82xx_CPM_FCC1); - ppc_sys_device_enable(MPC82xx_CPM_FCC2); - - /* to be ready for console, let's attach pdata here */ -#ifdef CONFIG_SERIAL_CPM_SCC1 - ppc_sys_device_setfunc(MPC82xx_CPM_SCC1, PPC_SYS_FUNC_UART); - ppc_sys_device_enable(MPC82xx_CPM_SCC1); - -#endif - -#ifdef CONFIG_SERIAL_CPM_SCC4 - ppc_sys_device_setfunc(MPC82xx_CPM_SCC4, PPC_SYS_FUNC_UART); - ppc_sys_device_enable(MPC82xx_CPM_SCC4); -#endif - - ppc_sys_device_enable(MPC82xx_MDIO_BB); - - return 0; -} - -/* - To prevent confusion, console selection is gross: - by 0 assumed SCC1 and by 1 assumed SCC4 - */ -struct platform_device* early_uart_get_pdev(int index) -{ - bd_t *bd = (bd_t *) __res; - struct fs_uart_platform_info *pinfo; - - struct platform_device* pdev = NULL; - if(index) { /*assume SCC4 here*/ - pdev = &ppc_sys_platform_devices[MPC82xx_CPM_SCC4]; - pinfo = &mpc8272_uart_pdata[fsid_scc4_uart]; - } else { /*over SCC1*/ - pdev = &ppc_sys_platform_devices[MPC82xx_CPM_SCC1]; - pinfo = &mpc8272_uart_pdata[fsid_scc1_uart]; - } - - pinfo->uart_clk = bd->bi_intfreq; - pdev->dev.platform_data = pinfo; - ppc_sys_fixup_mem_resource(pdev, CPM_MAP_ADDR); - return NULL; -} - -arch_initcall(mpc8272ads_init); diff --git a/arch/ppc/platforms/mpc885ads.h b/arch/ppc/platforms/mpc885ads.h deleted file mode 100644 index d3bbbb3c9a1..00000000000 --- a/arch/ppc/platforms/mpc885ads.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * A collection of structures, addresses, and values associated with - * the Freescale MPC885ADS board. - * Copied from the FADS stuff. - * - * Author: MontaVista Software, Inc. - * source@mvista.com - * - * 2005 (c) MontaVista Software, Inc. This file is licensed under the - * terms of the GNU General Public License version 2. This program is licensed - * "as is" without any warranty of any kind, whether express or implied. - */ - -#ifdef __KERNEL__ -#ifndef __ASM_MPC885ADS_H__ -#define __ASM_MPC885ADS_H__ - - -#include <asm/ppcboot.h> - -/* U-Boot maps BCSR to 0xff080000 */ -#define BCSR_ADDR ((uint)0xff080000) -#define BCSR_SIZE ((uint)32) -#define BCSR0 ((uint)(BCSR_ADDR + 0x00)) -#define BCSR1 ((uint)(BCSR_ADDR + 0x04)) -#define BCSR2 ((uint)(BCSR_ADDR + 0x08)) -#define BCSR3 ((uint)(BCSR_ADDR + 0x0c)) -#define BCSR4 ((uint)(BCSR_ADDR + 0x10)) - -#define CFG_PHYDEV_ADDR ((uint)0xff0a0000) -#define BCSR5 ((uint)(CFG_PHYDEV_ADDR + 0x300)) - -#define IMAP_ADDR ((uint)0xff000000) -#define IMAP_SIZE ((uint)(64 * 1024)) - -#define PCMCIA_MEM_ADDR ((uint)0xff020000) -#define PCMCIA_MEM_SIZE ((uint)(64 * 1024)) - -/* Bits of interest in the BCSRs. - */ -#define BCSR1_ETHEN ((uint)0x20000000) -#define BCSR1_IRDAEN ((uint)0x10000000) -#define BCSR1_RS232EN_1 ((uint)0x01000000) -#define BCSR1_PCCEN ((uint)0x00800000) -#define BCSR1_PCCVCC0 ((uint)0x00400000) -#define BCSR1_PCCVPP0 ((uint)0x00200000) -#define BCSR1_PCCVPP1 ((uint)0x00100000) -#define BCSR1_PCCVPP_MASK (BCSR1_PCCVPP0 | BCSR1_PCCVPP1) -#define BCSR1_RS232EN_2 ((uint)0x00040000) -#define BCSR1_PCCVCC1 ((uint)0x00010000) -#define BCSR1_PCCVCC_MASK (BCSR1_PCCVCC0 | BCSR1_PCCVCC1) - -#define BCSR4_ETH10_RST ((uint)0x80000000) /* 10Base-T PHY reset*/ -#define BCSR4_USB_LO_SPD ((uint)0x04000000) -#define BCSR4_USB_VCC ((uint)0x02000000) -#define BCSR4_USB_FULL_SPD ((uint)0x00040000) -#define BCSR4_USB_EN ((uint)0x00020000) - -#define BCSR5_MII2_EN 0x40 -#define BCSR5_MII2_RST 0x20 -#define BCSR5_T1_RST 0x10 -#define BCSR5_ATM155_RST 0x08 -#define BCSR5_ATM25_RST 0x04 -#define BCSR5_MII1_EN 0x02 -#define BCSR5_MII1_RST 0x01 - -/* Interrupt level assignments */ -#define PHY_INTERRUPT SIU_IRQ7 /* PHY link change interrupt */ -#define SIU_INT_FEC1 SIU_LEVEL1 /* FEC1 interrupt */ -#define SIU_INT_FEC2 SIU_LEVEL3 /* FEC2 interrupt */ -#define FEC_INTERRUPT SIU_INT_FEC1 /* FEC interrupt */ - -/* We don't use the 8259 */ -#define NR_8259_INTS 0 - -/* CPM Ethernet through SCC3 */ -#define PA_ENET_RXD ((ushort)0x0040) -#define PA_ENET_TXD ((ushort)0x0080) -#define PE_ENET_TCLK ((uint)0x00004000) -#define PE_ENET_RCLK ((uint)0x00008000) -#define PE_ENET_TENA ((uint)0x00000010) -#define PC_ENET_CLSN ((ushort)0x0400) -#define PC_ENET_RENA ((ushort)0x0800) - -/* Control bits in the SICR to route TCLK (CLK5) and RCLK (CLK6) to - * SCC3. Also, make sure GR3 (bit 8) and SC3 (bit 9) are zero */ -#define SICR_ENET_MASK ((uint)0x00ff0000) -#define SICR_ENET_CLKRT ((uint)0x002c0000) - -#define BOARD_CHIP_NAME "MPC885" - -#endif /* __ASM_MPC885ADS_H__ */ -#endif /* __KERNEL__ */ diff --git a/arch/ppc/platforms/mpc885ads_setup.c b/arch/ppc/platforms/mpc885ads_setup.c deleted file mode 100644 index ba06cc08cda..00000000000 --- a/arch/ppc/platforms/mpc885ads_setup.c +++ /dev/null @@ -1,476 +0,0 @@ -/*arch/ppc/platforms/mpc885ads_setup.c - * - * Platform setup for the Freescale mpc885ads board - * - * Vitaly Bordug <vbordug@ru.mvista.com> - * - * Copyright 2005 MontaVista Software Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/param.h> -#include <linux/string.h> -#include <linux/ioport.h> -#include <linux/device.h> - -#include <linux/fs_enet_pd.h> -#include <linux/fs_uart_pd.h> -#include <linux/mii.h> - -#include <asm/delay.h> -#include <asm/io.h> -#include <asm/machdep.h> -#include <asm/page.h> -#include <asm/processor.h> -#include <asm/system.h> -#include <asm/time.h> -#include <asm/ppcboot.h> -#include <asm/8xx_immap.h> -#include <asm/cpm1.h> -#include <asm/ppc_sys.h> - -extern unsigned char __res[]; -static void setup_smc1_ioports(struct fs_uart_platform_info*); -static void setup_smc2_ioports(struct fs_uart_platform_info*); - -static struct fs_mii_fec_platform_info mpc8xx_mdio_fec_pdata; -static void setup_fec1_ioports(struct fs_platform_info*); -static void setup_fec2_ioports(struct fs_platform_info*); -static void setup_scc3_ioports(struct fs_platform_info*); - -static struct fs_uart_platform_info mpc885_uart_pdata[] = { - [fsid_smc1_uart] = { - .brg = 1, - .fs_no = fsid_smc1_uart, - .init_ioports = setup_smc1_ioports, - .tx_num_fifo = 4, - .tx_buf_size = 32, - .rx_num_fifo = 4, - .rx_buf_size = 32, - }, - [fsid_smc2_uart] = { - .brg = 2, - .fs_no = fsid_smc2_uart, - .init_ioports = setup_smc2_ioports, - .tx_num_fifo = 4, - .tx_buf_size = 32, - .rx_num_fifo = 4, - .rx_buf_size = 32, - }, -}; - -static struct fs_platform_info mpc8xx_enet_pdata[] = { - [fsid_fec1] = { - .rx_ring = 128, - .tx_ring = 16, - .rx_copybreak = 240, - - .use_napi = 1, - .napi_weight = 17, - - .init_ioports = setup_fec1_ioports, - - .bus_id = "0:00", - .has_phy = 1, - }, - [fsid_fec2] = { - .rx_ring = 128, - .tx_ring = 16, - .rx_copybreak = 240, - - .use_napi = 1, - .napi_weight = 17, - - .init_ioports = setup_fec2_ioports, - - .bus_id = "0:01", - .has_phy = 1, - }, - [fsid_scc3] = { - .rx_ring = 64, - .tx_ring = 8, - .rx_copybreak = 240, - - .use_napi = 1, - .napi_weight = 17, - - .init_ioports = setup_scc3_ioports, -#ifdef CONFIG_FIXED_MII_10_FDX - .bus_id = "fixed@100:1", -#else - .bus_id = "0:02", - #endif - }, -}; - -void __init board_init(void) -{ - cpm8xx_t *cp = cpmp; - unsigned int *bcsr_io; - -#ifdef CONFIG_FS_ENET - immap_t *immap = (immap_t *) IMAP_ADDR; -#endif - bcsr_io = ioremap(BCSR1, sizeof(unsigned long)); - - if (bcsr_io == NULL) { - printk(KERN_CRIT "Could not remap BCSR\n"); - return; - } -#ifdef CONFIG_SERIAL_CPM_SMC1 - cp->cp_simode &= ~(0xe0000000 >> 17); /* brg1 */ - clrbits32(bcsr_io, BCSR1_RS232EN_1); - cp->cp_smc[0].smc_smcm |= (SMCM_RX | SMCM_TX); - cp->cp_smc[0].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN); -#else - setbits32(bcsr_io,BCSR1_RS232EN_1); - cp->cp_smc[0].smc_smcmr = 0; - cp->cp_smc[0].smc_smce = 0; -#endif - -#ifdef CONFIG_SERIAL_CPM_SMC2 - cp->cp_simode &= ~(0xe0000000 >> 1); - cp->cp_simode |= (0x20000000 >> 1); /* brg2 */ - clrbits32(bcsr_io,BCSR1_RS232EN_2); - cp->cp_smc[1].smc_smcm |= (SMCM_RX | SMCM_TX); - cp->cp_smc[1].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN); -#else - setbits32(bcsr_io,BCSR1_RS232EN_2); - cp->cp_smc[1].smc_smcmr = 0; - cp->cp_smc[1].smc_smce = 0; -#endif - iounmap(bcsr_io); - -#ifdef CONFIG_FS_ENET - /* use MDC for MII (common) */ - setbits16(&immap->im_ioport.iop_pdpar, 0x0080); - clrbits16(&immap->im_ioport.iop_pddir, 0x0080); - bcsr_io = ioremap(BCSR5, sizeof(unsigned long)); - clrbits32(bcsr_io,BCSR5_MII1_EN); - clrbits32(bcsr_io,BCSR5_MII1_RST); -#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2 - clrbits32(bcsr_io,BCSR5_MII2_EN); - clrbits32(bcsr_io,BCSR5_MII2_RST); -#endif - iounmap(bcsr_io); -#endif -} - -static void setup_fec1_ioports(struct fs_platform_info* pdata) -{ - immap_t *immap = (immap_t *) IMAP_ADDR; - - /* configure FEC1 pins */ - setbits16(&immap->im_ioport.iop_papar, 0xf830); - setbits16(&immap->im_ioport.iop_padir, 0x0830); - clrbits16(&immap->im_ioport.iop_padir, 0xf000); - setbits32(&immap->im_cpm.cp_pbpar, 0x00001001); - - clrbits32(&immap->im_cpm.cp_pbdir, 0x00001001); - setbits16(&immap->im_ioport.iop_pcpar, 0x000c); - clrbits16(&immap->im_ioport.iop_pcdir, 0x000c); - setbits32(&immap->im_cpm.cp_pepar, 0x00000003); - - setbits32(&immap->im_cpm.cp_pedir, 0x00000003); - clrbits32(&immap->im_cpm.cp_peso, 0x00000003); - clrbits32(&immap->im_cpm.cp_cptr, 0x00000100); -} - -static void setup_fec2_ioports(struct fs_platform_info* pdata) -{ - immap_t *immap = (immap_t *) IMAP_ADDR; - - /* configure FEC2 pins */ - setbits32(&immap->im_cpm.cp_pepar, 0x0003fffc); - setbits32(&immap->im_cpm.cp_pedir, 0x0003fffc); - clrbits32(&immap->im_cpm.cp_peso, 0x000087fc); - setbits32(&immap->im_cpm.cp_peso, 0x00037800); - clrbits32(&immap->im_cpm.cp_cptr, 0x00000080); -} - -static void setup_scc3_ioports(struct fs_platform_info* pdata) -{ - immap_t *immap = (immap_t *) IMAP_ADDR; - unsigned *bcsr_io; - - bcsr_io = ioremap(BCSR_ADDR, BCSR_SIZE); - - if (bcsr_io == NULL) { - printk(KERN_CRIT "Could not remap BCSR\n"); - return; - } - - /* Enable the PHY. - */ - clrbits32(bcsr_io+4, BCSR4_ETH10_RST); - udelay(1000); - setbits32(bcsr_io+4, BCSR4_ETH10_RST); - /* Configure port A pins for Txd and Rxd. - */ - setbits16(&immap->im_ioport.iop_papar, PA_ENET_RXD | PA_ENET_TXD); - clrbits16(&immap->im_ioport.iop_padir, PA_ENET_RXD | PA_ENET_TXD); - - /* Configure port C pins to enable CLSN and RENA. - */ - clrbits16(&immap->im_ioport.iop_pcpar, PC_ENET_CLSN | PC_ENET_RENA); - clrbits16(&immap->im_ioport.iop_pcdir, PC_ENET_CLSN | PC_ENET_RENA); - setbits16(&immap->im_ioport.iop_pcso, PC_ENET_CLSN | PC_ENET_RENA); - - /* Configure port E for TCLK and RCLK. - */ - setbits32(&immap->im_cpm.cp_pepar, PE_ENET_TCLK | PE_ENET_RCLK); - clrbits32(&immap->im_cpm.cp_pepar, PE_ENET_TENA); - clrbits32(&immap->im_cpm.cp_pedir, - PE_ENET_TCLK | PE_ENET_RCLK | PE_ENET_TENA); - clrbits32(&immap->im_cpm.cp_peso, PE_ENET_TCLK | PE_ENET_RCLK); - setbits32(&immap->im_cpm.cp_peso, PE_ENET_TENA); - - /* Configure Serial Interface clock routing. - * First, clear all SCC bits to zero, then set the ones we want. - */ - clrbits32(&immap->im_cpm.cp_sicr, SICR_ENET_MASK); - setbits32(&immap->im_cpm.cp_sicr, SICR_ENET_CLKRT); - - /* Disable Rx and Tx. SMC1 sshould be stopped if SCC3 eternet are used. - */ - immap->im_cpm.cp_smc[0].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN); - /* On the MPC885ADS SCC ethernet PHY is initialized in the full duplex mode - * by H/W setting after reset. SCC ethernet controller support only half duplex. - * This discrepancy of modes causes a lot of carrier lost errors. - */ - - /* In the original SCC enet driver the following code is placed at - the end of the initialization */ - setbits32(&immap->im_cpm.cp_pepar, PE_ENET_TENA); - clrbits32(&immap->im_cpm.cp_pedir, PE_ENET_TENA); - setbits32(&immap->im_cpm.cp_peso, PE_ENET_TENA); - - setbits32(bcsr_io+4, BCSR1_ETHEN); - iounmap(bcsr_io); -} - -static int mac_count = 0; - -static void mpc885ads_fixup_enet_pdata(struct platform_device *pdev, int fs_no) -{ - struct fs_platform_info *fpi; - bd_t *bd = (bd_t *) __res; - char *e; - int i; - - if(fs_no >= ARRAY_SIZE(mpc8xx_enet_pdata)) { - printk(KERN_ERR"No network-suitable #%d device on bus", fs_no); - return; - } - - fpi = &mpc8xx_enet_pdata[fs_no]; - - switch (fs_no) { - case fsid_fec1: - fpi->init_ioports = &setup_fec1_ioports; - break; - case fsid_fec2: - fpi->init_ioports = &setup_fec2_ioports; - break; - case fsid_scc3: - fpi->init_ioports = &setup_scc3_ioports; - break; - default: - printk(KERN_WARNING "Device %s is not supported!\n", pdev->name); - return; - } - - pdev->dev.platform_data = fpi; - fpi->fs_no = fs_no; - - e = (unsigned char *)&bd->bi_enetaddr; - for (i = 0; i < 6; i++) - fpi->macaddr[i] = *e++; - - fpi->macaddr[5] += mac_count++; - -} - -static void mpc885ads_fixup_fec_enet_pdata(struct platform_device *pdev, - int idx) -{ - /* This is for FEC devices only */ - if (!pdev || !pdev->name || (!strstr(pdev->name, "fsl-cpm-fec"))) - return; - mpc885ads_fixup_enet_pdata(pdev, fsid_fec1 + pdev->id - 1); -} - -static void __init mpc885ads_fixup_scc_enet_pdata(struct platform_device *pdev, - int idx) -{ - /* This is for SCC devices only */ - if (!pdev || !pdev->name || (!strstr(pdev->name, "fsl-cpm-scc"))) - return; - - mpc885ads_fixup_enet_pdata(pdev, fsid_scc1 + pdev->id - 1); -} - -static void setup_smc1_ioports(struct fs_uart_platform_info* pdata) -{ - immap_t *immap = (immap_t *) IMAP_ADDR; - unsigned *bcsr_io; - unsigned int iobits = 0x000000c0; - - bcsr_io = ioremap(BCSR1, sizeof(unsigned long)); - - if (bcsr_io == NULL) { - printk(KERN_CRIT "Could not remap BCSR1\n"); - return; - } - clrbits32(bcsr_io,BCSR1_RS232EN_1); - iounmap(bcsr_io); - - setbits32(&immap->im_cpm.cp_pbpar, iobits); - clrbits32(&immap->im_cpm.cp_pbdir, iobits); - clrbits16(&immap->im_cpm.cp_pbodr, iobits); -} - -static void setup_smc2_ioports(struct fs_uart_platform_info* pdata) -{ - immap_t *immap = (immap_t *) IMAP_ADDR; - unsigned *bcsr_io; - unsigned int iobits = 0x00000c00; - - bcsr_io = ioremap(BCSR1, sizeof(unsigned long)); - - if (bcsr_io == NULL) { - printk(KERN_CRIT "Could not remap BCSR1\n"); - return; - } - clrbits32(bcsr_io,BCSR1_RS232EN_2); - iounmap(bcsr_io); - -#ifndef CONFIG_SERIAL_CPM_ALT_SMC2 - setbits32(&immap->im_cpm.cp_pbpar, iobits); - clrbits32(&immap->im_cpm.cp_pbdir, iobits); - clrbits16(&immap->im_cpm.cp_pbodr, iobits); -#else - setbits16(&immap->im_ioport.iop_papar, iobits); - clrbits16(&immap->im_ioport.iop_padir, iobits); - clrbits16(&immap->im_ioport.iop_paodr, iobits); -#endif -} - -static void __init mpc885ads_fixup_uart_pdata(struct platform_device *pdev, - int idx) -{ - bd_t *bd = (bd_t *) __res; - struct fs_uart_platform_info *pinfo; - int num = ARRAY_SIZE(mpc885_uart_pdata); - - int id = fs_uart_id_smc2fsid(idx); - - /* no need to alter anything if console */ - if ((id < num) && (!pdev->dev.platform_data)) { - pinfo = &mpc885_uart_pdata[id]; - pinfo->uart_clk = bd->bi_intfreq; - pdev->dev.platform_data = pinfo; - } -} - - -static int mpc885ads_platform_notify(struct device *dev) -{ - - static const struct platform_notify_dev_map dev_map[] = { - { - .bus_id = "fsl-cpm-fec", - .rtn = mpc885ads_fixup_fec_enet_pdata, - }, - { - .bus_id = "fsl-cpm-scc", - .rtn = mpc885ads_fixup_scc_enet_pdata, - }, - { - .bus_id = "fsl-cpm-smc:uart", - .rtn = mpc885ads_fixup_uart_pdata - }, - { - .bus_id = NULL - } - }; - - platform_notify_map(dev_map,dev); - - return 0; -} - -int __init mpc885ads_init(void) -{ - struct fs_mii_fec_platform_info* fmpi; - bd_t *bd = (bd_t *) __res; - - printk(KERN_NOTICE "mpc885ads: Init\n"); - - platform_notify = mpc885ads_platform_notify; - - ppc_sys_device_initfunc(); - ppc_sys_device_disable_all(); - - ppc_sys_device_enable(MPC8xx_CPM_FEC1); - - ppc_sys_device_enable(MPC8xx_MDIO_FEC); - fmpi = ppc_sys_platform_devices[MPC8xx_MDIO_FEC].dev.platform_data = - &mpc8xx_mdio_fec_pdata; - - fmpi->mii_speed = ((((bd->bi_intfreq + 4999999) / 2500000) / 2) & 0x3F) << 1; - - /* No PHY interrupt line here */ - fmpi->irq[0xf] = SIU_IRQ7; - -#ifdef CONFIG_MPC8xx_SECOND_ETH_SCC3 - ppc_sys_device_enable(MPC8xx_CPM_SCC3); - -#endif -#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2 - ppc_sys_device_enable(MPC8xx_CPM_FEC2); -#endif - -#ifdef CONFIG_SERIAL_CPM_SMC1 - ppc_sys_device_enable(MPC8xx_CPM_SMC1); - ppc_sys_device_setfunc(MPC8xx_CPM_SMC1, PPC_SYS_FUNC_UART); -#endif - -#ifdef CONFIG_SERIAL_CPM_SMC2 - ppc_sys_device_enable(MPC8xx_CPM_SMC2); - ppc_sys_device_setfunc(MPC8xx_CPM_SMC2, PPC_SYS_FUNC_UART); -#endif - return 0; -} - -arch_initcall(mpc885ads_init); - -/* - To prevent confusion, console selection is gross: - by 0 assumed SMC1 and by 1 assumed SMC2 - */ -struct platform_device* early_uart_get_pdev(int index) -{ - bd_t *bd = (bd_t *) __res; - struct fs_uart_platform_info *pinfo; - - struct platform_device* pdev = NULL; - if(index) { /*assume SMC2 here*/ - pdev = &ppc_sys_platform_devices[MPC8xx_CPM_SMC2]; - pinfo = &mpc885_uart_pdata[1]; - } else { /*over SMC1*/ - pdev = &ppc_sys_platform_devices[MPC8xx_CPM_SMC1]; - pinfo = &mpc885_uart_pdata[0]; - } - - pinfo->uart_clk = bd->bi_intfreq; - pdev->dev.platform_data = pinfo; - ppc_sys_fixup_mem_resource(pdev, IMAP_ADDR); - return NULL; -} - diff --git a/arch/ppc/platforms/pq2ads.c b/arch/ppc/platforms/pq2ads.c deleted file mode 100644 index 7fc2e02f524..00000000000 --- a/arch/ppc/platforms/pq2ads.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * PQ2ADS platform support - * - * Author: Kumar Gala <galak@kernel.crashing.org> - * Derived from: est8260_setup.c by Allen Curtis - * - * Copyright 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/init.h> - -#include <asm/io.h> -#include <asm/mpc8260.h> -#include <asm/cpm2.h> -#include <asm/immap_cpm2.h> - -void __init -m82xx_board_setup(void) -{ - cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t)); - u32 *bcsr = ioremap(BCSR_ADDR+4, sizeof(u32)); - - /* Enable the 2nd UART port */ - clrbits32(bcsr, BCSR1_RS232_EN2); - -#ifdef CONFIG_SERIAL_CPM_SCC1 - clrbits32((u32*)&immap->im_scc[0].scc_sccm, UART_SCCM_TX | UART_SCCM_RX); - clrbits32((u32*)&immap->im_scc[0].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); -#endif - -#ifdef CONFIG_SERIAL_CPM_SCC2 - clrbits32((u32*)&immap->im_scc[1].scc_sccm, UART_SCCM_TX | UART_SCCM_RX); - clrbits32((u32*)&immap->im_scc[1].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); -#endif - -#ifdef CONFIG_SERIAL_CPM_SCC3 - clrbits32((u32*)&immap->im_scc[2].scc_sccm, UART_SCCM_TX | UART_SCCM_RX); - clrbits32((u32*)&immap->im_scc[2].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); -#endif - -#ifdef CONFIG_SERIAL_CPM_SCC4 - clrbits32((u32*)&immap->im_scc[3].scc_sccm, UART_SCCM_TX | UART_SCCM_RX); - clrbits32((u32*)&immap->im_scc[3].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); -#endif - - iounmap(bcsr); - iounmap(immap); -} diff --git a/arch/ppc/platforms/pq2ads.h b/arch/ppc/platforms/pq2ads.h deleted file mode 100644 index 2b287f4e0ca..00000000000 --- a/arch/ppc/platforms/pq2ads.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * A collection of structures, addresses, and values associated with - * the Motorola MPC8260ADS/MPC8266ADS-PCI boards. - * Copied from the RPX-Classic and SBS8260 stuff. - * - * Copyright (c) 2001 Dan Malek (dan@mvista.com) - */ -#ifdef __KERNEL__ -#ifndef __MACH_ADS8260_DEFS -#define __MACH_ADS8260_DEFS - - -#include <asm/ppcboot.h> - -#if defined(CONFIG_ADS8272) -#define BOARD_CHIP_NAME "8272" -#endif - -/* Memory map is configured by the PROM startup. - * We just map a few things we need. The CSR is actually 4 byte-wide - * registers that can be accessed as 8-, 16-, or 32-bit values. - */ -#define CPM_MAP_ADDR ((uint)0xf0000000) -#define BCSR_ADDR ((uint)0xf4500000) -#define BCSR_SIZE ((uint)(32 * 1024)) - -#define BOOTROM_RESTART_ADDR ((uint)0xff000104) - -/* For our show_cpuinfo hooks. */ -#define CPUINFO_VENDOR "Motorola" -#define CPUINFO_MACHINE "PQ2 ADS PowerPC" - -/* The ADS8260 has 16, 32-bit wide control/status registers, accessed - * only on word boundaries. - * Not all are used (yet), or are interesting to us (yet). - */ - -/* Things of interest in the CSR. -*/ -#define BCSR0_LED0 ((uint)0x02000000) /* 0 == on */ -#define BCSR0_LED1 ((uint)0x01000000) /* 0 == on */ -#define BCSR1_FETHIEN ((uint)0x08000000) /* 0 == enable */ -#define BCSR1_FETH_RST ((uint)0x04000000) /* 0 == reset */ -#define BCSR1_RS232_EN1 ((uint)0x02000000) /* 0 == enable */ -#define BCSR1_RS232_EN2 ((uint)0x01000000) /* 0 == enable */ -#define BCSR3_FETHIEN2 ((uint)0x10000000) /* 0 == enable */ -#define BCSR3_FETH2_RST ((uint)0x80000000) /* 0 == reset */ - -#define PHY_INTERRUPT SIU_INT_IRQ7 - -#ifdef CONFIG_PCI -/* PCI interrupt controller */ -#define PCI_INT_STAT_REG 0xF8200000 -#define PCI_INT_MASK_REG 0xF8200004 -#define PIRQA (NR_CPM_INTS + 0) -#define PIRQB (NR_CPM_INTS + 1) -#define PIRQC (NR_CPM_INTS + 2) -#define PIRQD (NR_CPM_INTS + 3) - -/* - * PCI memory map definitions for MPC8266ADS-PCI. - * - * processor view - * local address PCI address target - * 0x80000000-0x9FFFFFFF 0x80000000-0x9FFFFFFF PCI mem with prefetch - * 0xA0000000-0xBFFFFFFF 0xA0000000-0xBFFFFFFF PCI mem w/o prefetch - * 0xF4000000-0xF7FFFFFF 0x00000000-0x03FFFFFF PCI IO - * - * PCI master view - * local address PCI address target - * 0x00000000-0x1FFFFFFF 0x00000000-0x1FFFFFFF MPC8266 local memory - */ - -/* All the other PCI memory map definitions reside at syslib/m82xx_pci.h - Here we should redefine what is unique for this board */ -#define M82xx_PCI_SLAVE_MEM_LOCAL 0x00000000 /* Local base */ -#define M82xx_PCI_SLAVE_MEM_BUS 0x00000000 /* PCI base */ -#define M82xx_PCI_SLAVE_MEM_SIZE 0x10000000 /* 256 Mb */ - -#define M82xx_PCI_SLAVE_SEC_WND_SIZE ~(0x40000000 - 1U) /* 2 x 512Mb */ -#define M82xx_PCI_SLAVE_SEC_WND_BASE 0x80000000 /* PCI Memory base */ - -#if defined(CONFIG_ADS8272) -#define PCI_INT_TO_SIU SIU_INT_IRQ2 -#elif defined(CONFIG_PQ2FADS) -#define PCI_INT_TO_SIU SIU_INT_IRQ6 -#else -#warning PCI Bridge will be without interrupts support -#endif - -#endif /* CONFIG_PCI */ - -#endif /* __MACH_ADS8260_DEFS */ -#endif /* __KERNEL__ */ diff --git a/arch/ppc/platforms/pq2ads_pd.h b/arch/ppc/platforms/pq2ads_pd.h deleted file mode 100644 index 672483df807..00000000000 --- a/arch/ppc/platforms/pq2ads_pd.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef __PQ2ADS_PD_H -#define __PQ2ADS_PD_H -/* - * arch/ppc/platforms/82xx/pq2ads_pd.h - * - * Some defines for MPC82xx board-specific PlatformDevice descriptions - * - * 2005 (c) MontaVista Software, Inc. - * Vitaly Bordug <vbordug@ru.mvista.com> - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -/* FCC1 Clock Source Configuration. These can be redefined in the board specific file. - Can only choose from CLK9-12 */ - -#define F1_RXCLK 11 -#define F1_TXCLK 10 - -/* FCC2 Clock Source Configuration. These can be redefined in the board specific file. - Can only choose from CLK13-16 */ -#define F2_RXCLK 15 -#define F2_TXCLK 16 - -/* FCC3 Clock Source Configuration. These can be redefined in the board specific file. - Can only choose from CLK13-16 */ -#define F3_RXCLK 13 -#define F3_TXCLK 14 - -#endif diff --git a/arch/ppc/syslib/m8260_setup.c b/arch/ppc/syslib/m8260_setup.c index 46588fa9438..b40583724de 100644 --- a/arch/ppc/syslib/m8260_setup.c +++ b/arch/ppc/syslib/m8260_setup.c @@ -175,12 +175,6 @@ m8260_init_IRQ(void) * in case the boot rom changed something on us. */ cpm2_immr->im_intctl.ic_siprr = 0x05309770; - -#if defined(CONFIG_PCI) && (defined(CONFIG_ADS8272) || defined(CONFIG_PQ2FADS)) - /* Initialize stuff for the 82xx CPLD IC and install demux */ - pq2pci_init_irq(); -#endif - } /* diff --git a/arch/ppc/syslib/m82xx_pci.c b/arch/ppc/syslib/m82xx_pci.c index fe860d52e2e..657a1c25a2a 100644 --- a/arch/ppc/syslib/m82xx_pci.c +++ b/arch/ppc/syslib/m82xx_pci.c @@ -150,14 +150,6 @@ pq2pci_init_irq(void) { int irq; volatile cpm2_map_t *immap = cpm2_immr; -#if defined CONFIG_ADS8272 - /* configure chip select for PCI interrupt controller */ - immap->im_memctl.memc_br3 = PCI_INT_STAT_REG | 0x00001801; - immap->im_memctl.memc_or3 = 0xffff8010; -#elif defined CONFIG_PQ2FADS - immap->im_memctl.memc_br8 = PCI_INT_STAT_REG | 0x00001801; - immap->im_memctl.memc_or8 = 0xffff8010; -#endif for (irq = NR_CPM_INTS; irq < NR_CPM_INTS + 4; irq++) irq_desc[irq].chip = &pq2pci_ic; @@ -222,26 +214,6 @@ pq2ads_setup_pci(struct pci_controller *hose) immap->im_memctl.memc_pcibr1 = M82xx_PCI_SEC_WND_BASE | PCIBR_ENABLE; #endif -#if defined CONFIG_ADS8272 - immap->im_siu_conf.siu_82xx.sc_siumcr = - (immap->im_siu_conf.siu_82xx.sc_siumcr & - ~(SIUMCR_BBD | SIUMCR_ESE | SIUMCR_PBSE | - SIUMCR_CDIS | SIUMCR_DPPC11 | SIUMCR_L2CPC11 | - SIUMCR_LBPC11 | SIUMCR_APPC11 | - SIUMCR_CS10PC11 | SIUMCR_BCTLC11 | SIUMCR_MMR11)) | - SIUMCR_DPPC11 | SIUMCR_L2CPC01 | SIUMCR_LBPC00 | - SIUMCR_APPC10 | SIUMCR_CS10PC00 | - SIUMCR_BCTLC00 | SIUMCR_MMR11 ; - -#elif defined CONFIG_PQ2FADS - /* - * Setting required to enable IRQ1-IRQ7 (SIUMCR [DPPC]), - * and local bus for PCI (SIUMCR [LBPC]). - */ - immap->im_siu_conf.siu_82xx.sc_siumcr = (immap->im_siu_conf.siu_82xx.sc_siumcr & - ~(SIUMCR_L2CPC11 | SIUMCR_LBPC11 | SIUMCR_CS10PC11 | SIUMCR_APPC11) | - SIUMCR_BBD | SIUMCR_LBPC01 | SIUMCR_DPPC11 | SIUMCR_APPC10); -#endif /* Enable PCI */ immap->im_pci.pci_gcr = cpu_to_le32(PCIGCR_PCI_BUS_EN); @@ -284,12 +256,6 @@ pq2ads_setup_pci(struct pci_controller *hose) immap->im_pci.pci_pibar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_BUS >> PITA_ADDR_SHIFT); immap->im_pci.pci_pitar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_LOCAL>> PITA_ADDR_SHIFT); -#if defined CONFIG_ADS8272 - /* PCI int highest prio */ - immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x01236745; -#elif defined CONFIG_PQ2FADS - immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x03124567; -#endif /* park bus on PCI */ immap->im_siu_conf.siu_82xx.sc_ppc_acr = PPC_ACR_BUS_PARK_PCI; @@ -320,10 +286,6 @@ void __init pq2_find_bridges(void) hose->bus_offset = 0; hose->last_busno = 0xff; -#ifdef CONFIG_ADS8272 - hose->set_cfg_type = 1; -#endif - setup_m8260_indirect_pci(hose, (unsigned long)&cpm2_immr->im_pci.pci_cfg_addr, (unsigned long)&cpm2_immr->im_pci.pci_cfg_data); diff --git a/arch/ppc/syslib/m8xx_setup.c b/arch/ppc/syslib/m8xx_setup.c index 19749e9bcf9..18da720fc1b 100644 --- a/arch/ppc/syslib/m8xx_setup.c +++ b/arch/ppc/syslib/m8xx_setup.c @@ -141,16 +141,6 @@ m8xx_setup_arch(void) #endif #endif -#if defined (CONFIG_MPC86XADS) || defined (CONFIG_MPC885ADS) -#if defined(CONFIG_MTD_PHYSMAP) - physmap_configure(binfo->bi_flashstart, binfo->bi_flashsize, - MPC8xxADS_BANK_WIDTH, NULL); -#ifdef CONFIG_MTD_PARTITIONS - physmap_set_partitions(mpc8xxads_partitions, mpc8xxads_part_num); -#endif /* CONFIG_MTD_PARTITIONS */ -#endif /* CONFIG_MTD_PHYSMAP */ -#endif - board_init(); } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f6a68e178fc..8f5f02160ff 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK default y depends on SMP && PREEMPT +config PGSTE + bool + default y if KVM + mainmenu "Linux Kernel Configuration" config S390 @@ -69,6 +73,7 @@ config S390 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_KVM if 64BIT source "init/Kconfig" @@ -515,6 +520,13 @@ config ZFCPDUMP Select this option if you want to build an zfcpdump enabled kernel. Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. +config S390_GUEST +bool "s390 guest support (EXPERIMENTAL)" + depends on 64BIT && EXPERIMENTAL + select VIRTIO + select VIRTIO_RING + help + Select this option if you want to run the kernel under s390 linux endmenu source "net/Kconfig" @@ -536,3 +548,5 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +source "arch/s390/kvm/Kconfig" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index f708be367b0..792a4e7743c 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ - arch/s390/appldata/ arch/s390/hypfs/ + arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 540a67f979b..68ec4083bf7 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -144,6 +144,10 @@ static noinline __init void detect_machine_type(void) /* Running on a P/390 ? */ if (cpuinfo->cpu_id.machine == 0x7490) machine_flags |= 4; + + /* Running under KVM ? */ + if (cpuinfo->cpu_id.version == 0xfe) + machine_flags |= 64; } #ifdef CONFIG_64BIT diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7141147e6b6..a9d18aafa5f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p) early_param("ipldelay", early_parse_ipldelay); #ifdef CONFIG_S390_SWITCH_AMODE +#ifdef CONFIG_PGSTE +unsigned int switch_amode = 1; +#else unsigned int switch_amode = 0; +#endif EXPORT_SYMBOL_GPL(switch_amode); static void set_amode_and_uaccess(unsigned long user_amode, @@ -797,9 +801,13 @@ setup_arch(char **cmdline_p) "This machine has an IEEE fpu\n" : "This machine has no IEEE fpu\n"); #else /* CONFIG_64BIT */ - printk((MACHINE_IS_VM) ? - "We are running under VM (64 bit mode)\n" : - "We are running native (64 bit mode)\n"); + if (MACHINE_IS_VM) + printk("We are running under VM (64 bit mode)\n"); + else if (MACHINE_IS_KVM) { + printk("We are running under KVM (64 bit mode)\n"); + add_preferred_console("ttyS", 1, NULL); + } else + printk("We are running native (64 bit mode)\n"); #endif /* CONFIG_64BIT */ /* Save unparsed command line copy for /proc/cmdline */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c5f05b3fb2c..ca90ee3f930 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -110,6 +110,7 @@ void account_system_vtime(struct task_struct *tsk) S390_lowcore.steal_clock -= cputime << 12; account_system_time(tsk, 0, cputime); } +EXPORT_SYMBOL_GPL(account_system_vtime); static inline void set_vtimer(__u64 expires) { diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig new file mode 100644 index 00000000000..1761b74d639 --- /dev/null +++ b/arch/s390/kvm/Kconfig @@ -0,0 +1,46 @@ +# +# KVM configuration +# +config HAVE_KVM + bool + +menuconfig VIRTUALIZATION + bool "Virtualization" + default y + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + select S390_SWITCH_AMODE + select PREEMPT + ---help--- + Support hosting paravirtualized guest machines using the SIE + virtualization capability on the mainframe. This should work + on any 64bit machine. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_TRACE + bool + +# OK, it's a little counter-intuitive to do this, but it puts it neatly under +# the virtualization menu. +source drivers/virtio/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile new file mode 100644 index 00000000000..e5221ec0b8e --- /dev/null +++ b/arch/s390/kvm/Makefile @@ -0,0 +1,14 @@ +# Makefile for kernel virtual machines on s390 +# +# Copyright IBM Corp. 2008 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License (version 2 only) +# as published by the Free Software Foundation. + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm + +kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o +obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c new file mode 100644 index 00000000000..f639a152869 --- /dev/null +++ b/arch/s390/kvm/diag.c @@ -0,0 +1,67 @@ +/* + * diag.c - handling diagnose instructions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include "kvm-s390.h" + +static int __diag_time_slice_end(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); + vcpu->stat.diagnose_44++; + vcpu_put(vcpu); + schedule(); + vcpu_load(vcpu); + return 0; +} + +static int __diag_ipl_functions(struct kvm_vcpu *vcpu) +{ + unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; + unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff; + + VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); + switch (subcode) { + case 3: + vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; + break; + case 4: + vcpu->run->s390_reset_flags = 0; + break; + default: + return -ENOTSUPP; + } + + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; + vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; + vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; + vcpu->run->exit_reason = KVM_EXIT_S390_RESET; + VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx", + vcpu->run->s390_reset_flags); + return -EREMOTE; +} + +int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) +{ + int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + + switch (code) { + case 0x44: + return __diag_time_slice_end(vcpu); + case 0x308: + return __diag_ipl_functions(vcpu); + default: + return -ENOTSUPP; + } +} diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h new file mode 100644 index 00000000000..4e0633c413f --- /dev/null +++ b/arch/s390/kvm/gaccess.h @@ -0,0 +1,274 @@ +/* + * gaccess.h - access guest memory + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + +#ifndef __KVM_S390_GACCESS_H +#define __KVM_S390_GACCESS_H + +#include <linux/compiler.h> +#include <linux/kvm_host.h> +#include <asm/uaccess.h> + +static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, + u64 guestaddr) +{ + u64 prefix = vcpu->arch.sie_block->prefix; + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if (guestaddr < 2 * PAGE_SIZE) + guestaddr += prefix; + else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) + guestaddr -= prefix; + + if (guestaddr > memsize) + return (void __user __force *) ERR_PTR(-EFAULT); + + guestaddr += origin; + + return (void __user *) guestaddr; +} + +static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, + u64 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 7); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u64 __user *) uptr); +} + +static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, + u32 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 3); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u32 __user *) uptr); +} + +static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, + u16 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 1); + + if (IS_ERR(uptr)) + return PTR_ERR(uptr); + + return get_user(*result, (u16 __user *) uptr); +} + +static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, + u8 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u8 __user *) uptr); +} + +static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, + u64 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 7); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u64 __user *) uptr); +} + +static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, + u32 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 3); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u32 __user *) uptr); +} + +static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, + u16 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 1); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u16 __user *) uptr); +} + +static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, + u8 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u8 __user *) uptr); +} + + +static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest, + const void *from, unsigned long n) +{ + int rc; + unsigned long i; + const u8 *data = from; + + for (i = 0; i < n; i++) { + rc = put_guest_u8(vcpu, guestdest++, *(data++)); + if (rc < 0) + return rc; + } + return 0; +} + +static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest, + const void *from, unsigned long n) +{ + u64 prefix = vcpu->arch.sie_block->prefix; + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) + goto slowpath; + + if ((guestdest < prefix) && (guestdest + n > prefix)) + goto slowpath; + + if ((guestdest < prefix + 2 * PAGE_SIZE) + && (guestdest + n > prefix + 2 * PAGE_SIZE)) + goto slowpath; + + if (guestdest < 2 * PAGE_SIZE) + guestdest += prefix; + else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) + guestdest -= prefix; + + if (guestdest + n > memsize) + return -EFAULT; + + if (guestdest + n < guestdest) + return -EFAULT; + + guestdest += origin; + + return copy_to_user((void __user *) guestdest, from, n); +slowpath: + return __copy_to_guest_slow(vcpu, guestdest, from, n); +} + +static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, + u64 guestsrc, unsigned long n) +{ + int rc; + unsigned long i; + u8 *data = to; + + for (i = 0; i < n; i++) { + rc = get_guest_u8(vcpu, guestsrc++, data++); + if (rc < 0) + return rc; + } + return 0; +} + +static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, + u64 guestsrc, unsigned long n) +{ + u64 prefix = vcpu->arch.sie_block->prefix; + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) + goto slowpath; + + if ((guestsrc < prefix) && (guestsrc + n > prefix)) + goto slowpath; + + if ((guestsrc < prefix + 2 * PAGE_SIZE) + && (guestsrc + n > prefix + 2 * PAGE_SIZE)) + goto slowpath; + + if (guestsrc < 2 * PAGE_SIZE) + guestsrc += prefix; + else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) + guestsrc -= prefix; + + if (guestsrc + n > memsize) + return -EFAULT; + + if (guestsrc + n < guestsrc) + return -EFAULT; + + guestsrc += origin; + + return copy_from_user(to, (void __user *) guestsrc, n); +slowpath: + return __copy_from_guest_slow(vcpu, to, guestsrc, n); +} + +static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest, + const void *from, unsigned long n) +{ + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if (guestdest + n > memsize) + return -EFAULT; + + if (guestdest + n < guestdest) + return -EFAULT; + + guestdest += origin; + + return copy_to_user((void __user *) guestdest, from, n); +} + +static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, + u64 guestsrc, unsigned long n) +{ + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if (guestsrc + n > memsize) + return -EFAULT; + + if (guestsrc + n < guestsrc) + return -EFAULT; + + guestsrc += origin; + + return copy_from_user(to, (void __user *) guestsrc, n); +} +#endif diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c new file mode 100644 index 00000000000..349581a2610 --- /dev/null +++ b/arch/s390/kvm/intercept.c @@ -0,0 +1,216 @@ +/* + * intercept.c - in-kernel handling for sie intercepts + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/errno.h> +#include <linux/pagemap.h> + +#include <asm/kvm_host.h> + +#include "kvm-s390.h" +#include "gaccess.h" + +static int handle_lctg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + + ((vcpu->arch.sie_block->ipb & 0xff00) << 4); + u64 useraddr; + int reg, rc; + + vcpu->stat.instruction_lctg++; + if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f) + return -ENOTSUPP; + + useraddr = disp2; + if (base2) + useraddr += vcpu->arch.guest_gprs[base2]; + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, + disp2); + + do { + rc = get_guest_u64(vcpu, useraddr, + &vcpu->arch.sie_block->gcr[reg]); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + break; + } + useraddr += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + return 0; +} + +static int handle_lctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 useraddr; + u32 val = 0; + int reg, rc; + + vcpu->stat.instruction_lctl++; + + useraddr = disp2; + if (base2) + useraddr += vcpu->arch.guest_gprs[base2]; + + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, + disp2); + + reg = reg1; + do { + rc = get_guest_u32(vcpu, useraddr, &val); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + break; + } + vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; + vcpu->arch.sie_block->gcr[reg] |= val; + useraddr += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + return 0; +} + +static intercept_handler_t instruction_handlers[256] = { + [0x83] = kvm_s390_handle_diag, + [0xae] = kvm_s390_handle_sigp, + [0xb2] = kvm_s390_handle_priv, + [0xb7] = handle_lctl, + [0xeb] = handle_lctg, +}; + +static int handle_noop(struct kvm_vcpu *vcpu) +{ + switch (vcpu->arch.sie_block->icptcode) { + case 0x10: + vcpu->stat.exit_external_request++; + break; + case 0x14: + vcpu->stat.exit_external_interrupt++; + break; + default: + break; /* nothing */ + } + return 0; +} + +static int handle_stop(struct kvm_vcpu *vcpu) +{ + int rc; + + vcpu->stat.exit_stop_request++; + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + spin_lock_bh(&vcpu->arch.local_int.lock); + if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; + rc = __kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_NOADDR); + if (rc >= 0) + rc = -ENOTSUPP; + } + + if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; + VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); + rc = -ENOTSUPP; + } else + rc = 0; + spin_unlock_bh(&vcpu->arch.local_int.lock); + return rc; +} + +static int handle_validity(struct kvm_vcpu *vcpu) +{ + int viwhy = vcpu->arch.sie_block->ipb >> 16; + vcpu->stat.exit_validity++; + if (viwhy == 0x37) { + fault_in_pages_writeable((char __user *) + vcpu->kvm->arch.guest_origin + + vcpu->arch.sie_block->prefix, + PAGE_SIZE); + return 0; + } + VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", + viwhy); + return -ENOTSUPP; +} + +static int handle_instruction(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + vcpu->stat.exit_instruction++; + handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; + if (handler) + return handler(vcpu); + return -ENOTSUPP; +} + +static int handle_prog(struct kvm_vcpu *vcpu) +{ + vcpu->stat.exit_program_interruption++; + return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc); +} + +static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) +{ + int rc, rc2; + + vcpu->stat.exit_instr_and_program++; + rc = handle_instruction(vcpu); + rc2 = handle_prog(vcpu); + + if (rc == -ENOTSUPP) + vcpu->arch.sie_block->icptcode = 0x04; + if (rc) + return rc; + return rc2; +} + +static const intercept_handler_t intercept_funcs[0x48 >> 2] = { + [0x00 >> 2] = handle_noop, + [0x04 >> 2] = handle_instruction, + [0x08 >> 2] = handle_prog, + [0x0C >> 2] = handle_instruction_and_prog, + [0x10 >> 2] = handle_noop, + [0x14 >> 2] = handle_noop, + [0x1C >> 2] = kvm_s390_handle_wait, + [0x20 >> 2] = handle_validity, + [0x28 >> 2] = handle_stop, +}; + +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) +{ + intercept_handler_t func; + u8 code = vcpu->arch.sie_block->icptcode; + + if (code & 3 || code > 0x48) + return -ENOTSUPP; + func = intercept_funcs[code >> 2]; + if (func) + return func(vcpu); + return -ENOTSUPP; +} diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c new file mode 100644 index 00000000000..fcd1ed8015c --- /dev/null +++ b/arch/s390/kvm/interrupt.c @@ -0,0 +1,592 @@ +/* + * interrupt.c - handling kvm guest interrupts + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + +#include <asm/lowcore.h> +#include <asm/uaccess.h> +#include <linux/kvm_host.h> +#include "kvm-s390.h" +#include "gaccess.h" + +static int psw_extint_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); +} + +static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) +{ + if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) || + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT)) + return 0; + return 1; +} + +static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, + struct interrupt_info *inti) +{ + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) + return 1; + return 0; + case KVM_S390_INT_SERVICE: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x200ul) + return 1; + return 0; + case KVM_S390_INT_VIRTIO: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x200ul) + return 1; + return 0; + case KVM_S390_PROGRAM_INT: + case KVM_S390_SIGP_STOP: + case KVM_S390_SIGP_SET_PREFIX: + case KVM_S390_RESTART: + return 1; + default: + BUG(); + } + return 0; +} + +static void __set_cpu_idle(struct kvm_vcpu *vcpu) +{ + BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); + atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); +} + +static void __unset_cpu_idle(struct kvm_vcpu *vcpu) +{ + BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); + atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); +} + +static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(CPUSTAT_ECALL_PEND | + CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, + &vcpu->arch.sie_block->cpuflags); + vcpu->arch.sie_block->lctl = 0x0000; +} + +static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) +{ + atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); +} + +static void __set_intercept_indicator(struct kvm_vcpu *vcpu, + struct interrupt_info *inti) +{ + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + case KVM_S390_INT_SERVICE: + case KVM_S390_INT_VIRTIO: + if (psw_extint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_EXT_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR0; + break; + case KVM_S390_SIGP_STOP: + __set_cpuflag(vcpu, CPUSTAT_STOP_INT); + break; + default: + BUG(); + } +} + +static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, + struct interrupt_info *inti) +{ + const unsigned short table[] = { 2, 4, 4, 6 }; + int rc, exception = 0; + + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); + vcpu->stat.deliver_emergency_signal++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_INT_SERVICE: + VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", + inti->ext.ext_params); + vcpu->stat.deliver_service_signal++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_INT_VIRTIO: + VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx", + inti->ext.ext_params, inti->ext.ext_params2); + vcpu->stat.deliver_virtio_interrupt++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM, + inti->ext.ext_params2); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_SIGP_STOP: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); + vcpu->stat.deliver_stop_signal++; + __set_intercept_indicator(vcpu, inti); + break; + + case KVM_S390_SIGP_SET_PREFIX: + VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", + inti->prefix.address); + vcpu->stat.deliver_prefix_signal++; + vcpu->arch.sie_block->prefix = inti->prefix.address; + vcpu->arch.sie_block->ihcpu = 0xffff; + break; + + case KVM_S390_RESTART: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); + vcpu->stat.deliver_restart_signal++; + rc = copy_to_guest(vcpu, offsetof(struct _lowcore, + restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_PROGRAM_INT: + VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", + inti->pgm.code, + table[vcpu->arch.sie_block->ipa >> 14]); + vcpu->stat.deliver_program_int++; + rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_PGM_ILC, + table[vcpu->arch.sie_block->ipa >> 14]); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_PGM_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + default: + BUG(); + } + + if (exception) { + VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" + " interrupt"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (inti->type == KVM_S390_PROGRAM_INT) { + printk(KERN_WARNING "kvm: recursive program check\n"); + BUG(); + } + } +} + +static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) +{ + int rc, exception = 0; + + if (psw_extint_disabled(vcpu)) + return 0; + if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) + return 0; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); + if (rc == -EFAULT) + exception = 1; + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + if (exception) { + VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \ + " ckc interrupt"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + return 0; + } + + return 1; +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct local_interrupt *li = &vcpu->arch.local_int; + struct float_interrupt *fi = vcpu->arch.local_int.float_int; + struct interrupt_info *inti; + int rc = 0; + + if (atomic_read(&li->active)) { + spin_lock_bh(&li->lock); + list_for_each_entry(inti, &li->list, list) + if (__interrupt_is_deliverable(vcpu, inti)) { + rc = 1; + break; + } + spin_unlock_bh(&li->lock); + } + + if ((!rc) && atomic_read(&fi->active)) { + spin_lock_bh(&fi->lock); + list_for_each_entry(inti, &fi->list, list) + if (__interrupt_is_deliverable(vcpu, inti)) { + rc = 1; + break; + } + spin_unlock_bh(&fi->lock); + } + + if ((!rc) && (vcpu->arch.sie_block->ckc < + get_clock() + vcpu->arch.sie_block->epoch)) { + if ((!psw_extint_disabled(vcpu)) && + (vcpu->arch.sie_block->gcr[0] & 0x800ul)) + rc = 1; + } + + return rc; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) +{ + u64 now, sltime; + DECLARE_WAITQUEUE(wait, current); + + vcpu->stat.exit_wait_state++; + if (kvm_cpu_has_interrupt(vcpu)) + return 0; + + if (psw_interrupts_disabled(vcpu)) { + VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); + __unset_cpu_idle(vcpu); + return -ENOTSUPP; /* disabled wait */ + } + + if (psw_extint_disabled(vcpu) || + (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) { + VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); + goto no_timer; + } + + now = get_clock() + vcpu->arch.sie_block->epoch; + if (vcpu->arch.sie_block->ckc < now) { + __unset_cpu_idle(vcpu); + return 0; + } + + sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1; + + vcpu->arch.ckc_timer.expires = jiffies + sltime; + + add_timer(&vcpu->arch.ckc_timer); + VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime); +no_timer: + spin_lock_bh(&vcpu->arch.local_int.float_int->lock); + spin_lock_bh(&vcpu->arch.local_int.lock); + __set_cpu_idle(vcpu); + vcpu->arch.local_int.timer_due = 0; + add_wait_queue(&vcpu->arch.local_int.wq, &wait); + while (list_empty(&vcpu->arch.local_int.list) && + list_empty(&vcpu->arch.local_int.float_int->list) && + (!vcpu->arch.local_int.timer_due) && + !signal_pending(current)) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); + vcpu_put(vcpu); + schedule(); + vcpu_load(vcpu); + spin_lock_bh(&vcpu->arch.local_int.float_int->lock); + spin_lock_bh(&vcpu->arch.local_int.lock); + } + __unset_cpu_idle(vcpu); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&vcpu->wq, &wait); + spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); + del_timer(&vcpu->arch.ckc_timer); + return 0; +} + +void kvm_s390_idle_wakeup(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + spin_lock_bh(&vcpu->arch.local_int.lock); + vcpu->arch.local_int.timer_due = 1; + if (waitqueue_active(&vcpu->arch.local_int.wq)) + wake_up_interruptible(&vcpu->arch.local_int.wq); + spin_unlock_bh(&vcpu->arch.local_int.lock); +} + + +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) +{ + struct local_interrupt *li = &vcpu->arch.local_int; + struct float_interrupt *fi = vcpu->arch.local_int.float_int; + struct interrupt_info *n, *inti = NULL; + int deliver; + + __reset_intercept_indicators(vcpu); + if (atomic_read(&li->active)) { + do { + deliver = 0; + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + if (__interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&li->list)) + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } + + if ((vcpu->arch.sie_block->ckc < + get_clock() + vcpu->arch.sie_block->epoch)) + __try_deliver_ckc_interrupt(vcpu); + + if (atomic_read(&fi->active)) { + do { + deliver = 0; + spin_lock_bh(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + if (__interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock_bh(&fi->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } +} + +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) +{ + struct local_interrupt *li = &vcpu->arch.local_int; + struct interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_PROGRAM_INT;; + inti->pgm.code = code; + + VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); + spin_lock_bh(&li->lock); + list_add(&inti->list, &li->list); + atomic_set(&li->active, 1); + BUG_ON(waitqueue_active(&li->wq)); + spin_unlock_bh(&li->lock); + return 0; +} + +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int) +{ + struct local_interrupt *li; + struct float_interrupt *fi; + struct interrupt_info *inti; + int sigcpu; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + switch (s390int->type) { + case KVM_S390_INT_VIRTIO: + VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx", + s390int->parm, s390int->parm64); + inti->type = s390int->type; + inti->ext.ext_params = s390int->parm; + inti->ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_INT_SERVICE: + VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); + inti->type = s390int->type; + inti->ext.ext_params = s390int->parm; + break; + case KVM_S390_PROGRAM_INT: + case KVM_S390_SIGP_STOP: + case KVM_S390_INT_EMERGENCY: + default: + kfree(inti); + return -EINVAL; + } + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock_bh(&fi->lock); + list_add_tail(&inti->list, &fi->list); + atomic_set(&fi->active, 1); + sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); + if (sigcpu == KVM_MAX_VCPUS) { + do { + sigcpu = fi->next_rr_cpu++; + if (sigcpu == KVM_MAX_VCPUS) + sigcpu = fi->next_rr_cpu = 0; + } while (fi->local_int[sigcpu] == NULL); + } + li = fi->local_int[sigcpu]; + spin_lock_bh(&li->lock); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + spin_unlock_bh(&fi->lock); + mutex_unlock(&kvm->lock); + return 0; +} + +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int) +{ + struct local_interrupt *li; + struct interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + switch (s390int->type) { + case KVM_S390_PROGRAM_INT: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + inti->type = s390int->type; + inti->pgm.code = s390int->parm; + VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", + s390int->parm); + break; + case KVM_S390_SIGP_STOP: + case KVM_S390_RESTART: + case KVM_S390_SIGP_SET_PREFIX: + case KVM_S390_INT_EMERGENCY: + VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); + inti->type = s390int->type; + break; + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + default: + kfree(inti); + return -EINVAL; + } + + mutex_lock(&vcpu->kvm->lock); + li = &vcpu->arch.local_int; + spin_lock_bh(&li->lock); + if (inti->type == KVM_S390_PROGRAM_INT) + list_add(&inti->list, &li->list); + else + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + if (inti->type == KVM_S390_SIGP_STOP) + li->action_bits |= ACTION_STOP_ON_STOP; + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&vcpu->arch.local_int.wq); + spin_unlock_bh(&li->lock); + mutex_unlock(&vcpu->kvm->lock); + return 0; +} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c new file mode 100644 index 00000000000..98d1e73e01f --- /dev/null +++ b/arch/s390/kvm/kvm-s390.c @@ -0,0 +1,685 @@ +/* + * s390host.c -- hosting zSeries kernel virtual machines + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + * Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/compiler.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <asm/lowcore.h> +#include <asm/pgtable.h> + +#include "kvm-s390.h" +#include "gaccess.h" + +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { "userspace_handled", VCPU_STAT(exit_userspace) }, + { "exit_validity", VCPU_STAT(exit_validity) }, + { "exit_stop_request", VCPU_STAT(exit_stop_request) }, + { "exit_external_request", VCPU_STAT(exit_external_request) }, + { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, + { "exit_instruction", VCPU_STAT(exit_instruction) }, + { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, + { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "instruction_lctg", VCPU_STAT(instruction_lctg) }, + { "instruction_lctl", VCPU_STAT(instruction_lctl) }, + { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, + { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, + { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, + { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, + { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, + { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, + { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, + { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_stidp", VCPU_STAT(instruction_stidp) }, + { "instruction_spx", VCPU_STAT(instruction_spx) }, + { "instruction_stpx", VCPU_STAT(instruction_stpx) }, + { "instruction_stap", VCPU_STAT(instruction_stap) }, + { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, + { "instruction_stsch", VCPU_STAT(instruction_stsch) }, + { "instruction_chsc", VCPU_STAT(instruction_chsc) }, + { "instruction_stsi", VCPU_STAT(instruction_stsi) }, + { "instruction_stfl", VCPU_STAT(instruction_stfl) }, + { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, + { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, + { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, + { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, + { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_44", VCPU_STAT(diagnose_44) }, + { NULL } +}; + + +/* Section: not file related */ +void kvm_arch_hardware_enable(void *garbage) +{ + /* every s390 is virtualization enabled ;-) */ +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +void decache_vcpus_on_cpu(int cpu) +{ +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +void kvm_arch_check_processor_compat(void *rtn) +{ +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +void kvm_arch_exit(void) +{ +} + +/* Section: device related */ +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + if (ioctl == KVM_S390_ENABLE_SIE) + return s390_enable_sie(); + return -EINVAL; +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + return 0; +} + +/* Section: vm related */ +/* + * Get (and clear) the dirty memory log for a memory slot. + */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + return 0; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + int r; + + switch (ioctl) { + case KVM_S390_INTERRUPT: { + struct kvm_s390_interrupt s390int; + + r = -EFAULT; + if (copy_from_user(&s390int, argp, sizeof(s390int))) + break; + r = kvm_s390_inject_vm(kvm, &s390int); + break; + } + default: + r = -EINVAL; + } + + return r; +} + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm; + int rc; + char debug_name[16]; + + rc = s390_enable_sie(); + if (rc) + goto out_nokvm; + + rc = -ENOMEM; + kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); + if (!kvm) + goto out_nokvm; + + kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + if (!kvm->arch.sca) + goto out_nosca; + + sprintf(debug_name, "kvm-%u", current->pid); + + kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); + if (!kvm->arch.dbf) + goto out_nodbf; + + spin_lock_init(&kvm->arch.float_int.lock); + INIT_LIST_HEAD(&kvm->arch.float_int.list); + + debug_register_view(kvm->arch.dbf, &debug_sprintf_view); + VM_EVENT(kvm, 3, "%s", "vm created"); + + try_module_get(THIS_MODULE); + + return kvm; +out_nodbf: + free_page((unsigned long)(kvm->arch.sca)); +out_nosca: + kfree(kvm); +out_nokvm: + return ERR_PTR(rc); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + debug_unregister(kvm->arch.dbf); + free_page((unsigned long)(kvm->arch.sca)); + kfree(kvm); + module_put(THIS_MODULE); +} + +/* Section: vcpu related */ +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + /* kvm common code refers to this, but does'nt call it */ + BUG(); +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + save_fp_regs(&vcpu->arch.host_fpregs); + save_access_regs(vcpu->arch.host_acrs); + vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; + restore_fp_regs(&vcpu->arch.guest_fpregs); + restore_access_regs(vcpu->arch.guest_acrs); + + if (signal_pending(current)) + atomic_set_mask(CPUSTAT_STOP_INT, + &vcpu->arch.sie_block->cpuflags); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + save_fp_regs(&vcpu->arch.guest_fpregs); + save_access_regs(vcpu->arch.guest_acrs); + restore_fp_regs(&vcpu->arch.host_fpregs); + restore_access_regs(vcpu->arch.host_acrs); +} + +static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) +{ + /* this equals initial cpu reset in pop, but we don't switch to ESA */ + vcpu->arch.sie_block->gpsw.mask = 0UL; + vcpu->arch.sie_block->gpsw.addr = 0UL; + vcpu->arch.sie_block->prefix = 0UL; + vcpu->arch.sie_block->ihcpu = 0xffff; + vcpu->arch.sie_block->cputm = 0UL; + vcpu->arch.sie_block->ckc = 0UL; + vcpu->arch.sie_block->todpr = 0; + memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); + vcpu->arch.sie_block->gcr[0] = 0xE0UL; + vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; + vcpu->arch.guest_fpregs.fpc = 0; + asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); + vcpu->arch.sie_block->gbea = 1; +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); + vcpu->arch.sie_block->gmslm = 0xffffffffffUL; + vcpu->arch.sie_block->gmsor = 0x000000000000; + vcpu->arch.sie_block->ecb = 2; + vcpu->arch.sie_block->eca = 0xC1002001U; + setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, + (unsigned long) vcpu); + get_cpu_id(&vcpu->arch.cpu_id); + vcpu->arch.cpu_id.version = 0xfe; + return 0; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, + unsigned int id) +{ + struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); + int rc = -ENOMEM; + + if (!vcpu) + goto out_nomem; + + vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL); + + if (!vcpu->arch.sie_block) + goto out_free_cpu; + + vcpu->arch.sie_block->icpua = id; + BUG_ON(!kvm->arch.sca); + BUG_ON(kvm->arch.sca->cpu[id].sda); + kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; + + spin_lock_init(&vcpu->arch.local_int.lock); + INIT_LIST_HEAD(&vcpu->arch.local_int.list); + vcpu->arch.local_int.float_int = &kvm->arch.float_int; + spin_lock_bh(&kvm->arch.float_int.lock); + kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; + init_waitqueue_head(&vcpu->arch.local_int.wq); + vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; + spin_unlock_bh(&kvm->arch.float_int.lock); + + rc = kvm_vcpu_init(vcpu, kvm, id); + if (rc) + goto out_free_cpu; + VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + vcpu->arch.sie_block); + + try_module_get(THIS_MODULE); + + return vcpu; +out_free_cpu: + kfree(vcpu); +out_nomem: + return ERR_PTR(rc); +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 3, "%s", "destroy cpu"); + free_page((unsigned long)(vcpu->arch.sie_block)); + kfree(vcpu); + module_put(THIS_MODULE); +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + /* kvm common code refers to this, but never calls it */ + BUG(); + return 0; +} + +static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) +{ + vcpu_load(vcpu); + kvm_s390_vcpu_initial_reset(vcpu); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + vcpu_load(vcpu); + memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); + memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + vcpu_load(vcpu); + memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); + memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_load(vcpu); + memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); + vcpu->arch.guest_fpregs.fpc = fpu->fpc; + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_load(vcpu); + memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); + fpu->fpc = vcpu->arch.guest_fpregs.fpc; + vcpu_put(vcpu); + return 0; +} + +static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) +{ + int rc = 0; + + vcpu_load(vcpu); + if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) + rc = -EBUSY; + else + vcpu->arch.sie_block->gpsw = psw; + vcpu_put(vcpu); + return rc; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +static void __vcpu_run(struct kvm_vcpu *vcpu) +{ + memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); + + if (need_resched()) + schedule(); + + vcpu->arch.sie_block->icptcode = 0; + local_irq_disable(); + kvm_guest_enter(); + local_irq_enable(); + VCPU_EVENT(vcpu, 6, "entering sie flags %x", + atomic_read(&vcpu->arch.sie_block->cpuflags)); + sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs); + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", + vcpu->arch.sie_block->icptcode); + local_irq_disable(); + kvm_guest_exit(); + local_irq_enable(); + + memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16); +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int rc; + sigset_t sigsaved; + + vcpu_load(vcpu); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + + BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); + + switch (kvm_run->exit_reason) { + case KVM_EXIT_S390_SIEIC: + vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask; + vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr; + break; + case KVM_EXIT_UNKNOWN: + case KVM_EXIT_S390_RESET: + break; + default: + BUG(); + } + + might_sleep(); + + do { + kvm_s390_deliver_pending_interrupts(vcpu); + __vcpu_run(vcpu); + rc = kvm_handle_sie_intercept(vcpu); + } while (!signal_pending(current) && !rc); + + if (signal_pending(current) && !rc) + rc = -EINTR; + + if (rc == -ENOTSUPP) { + /* intercept cannot be handled in-kernel, prepare kvm-run */ + kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; + kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask; + kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr; + kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + rc = 0; + } + + if (rc == -EREMOTE) { + /* intercept was handled, but userspace support is needed + * kvm_run has been prepared by the handler */ + rc = 0; + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu_put(vcpu); + + vcpu->stat.exit_userspace++; + return rc; +} + +static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from, + unsigned long n, int prefix) +{ + if (prefix) + return copy_to_guest(vcpu, guestdest, from, n); + else + return copy_to_guest_absolute(vcpu, guestdest, from, n); +} + +/* + * store status at address + * we use have two special cases: + * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit + * KVM_S390_STORE_STATUS_PREFIXED: -> prefix + */ +int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + const unsigned char archmode = 1; + int prefix; + + if (addr == KVM_S390_STORE_STATUS_NOADDR) { + if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) + return -EFAULT; + addr = SAVE_AREA_BASE; + prefix = 0; + } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) { + if (copy_to_guest(vcpu, 163ul, &archmode, 1)) + return -EFAULT; + addr = SAVE_AREA_BASE; + prefix = 1; + } else + prefix = 0; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs), + vcpu->arch.guest_fpregs.fprs, 128, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs), + vcpu->arch.guest_gprs, 128, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw), + &vcpu->arch.sie_block->gpsw, 16, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg), + &vcpu->arch.sie_block->prefix, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, + addr + offsetof(struct save_area_s390x, fp_ctrl_reg), + &vcpu->arch.guest_fpregs.fpc, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg), + &vcpu->arch.sie_block->todpr, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer), + &vcpu->arch.sie_block->cputm, 8, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp), + &vcpu->arch.sie_block->ckc, 8, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs), + &vcpu->arch.guest_acrs, 64, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, + addr + offsetof(struct save_area_s390x, ctrl_regs), + &vcpu->arch.sie_block->gcr, 128, prefix)) + return -EFAULT; + return 0; +} + +static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + int rc; + + vcpu_load(vcpu); + rc = __kvm_s390_vcpu_store_status(vcpu, addr); + vcpu_put(vcpu); + return rc; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_S390_INTERRUPT: { + struct kvm_s390_interrupt s390int; + + if (copy_from_user(&s390int, argp, sizeof(s390int))) + return -EFAULT; + return kvm_s390_inject_vcpu(vcpu, &s390int); + } + case KVM_S390_STORE_STATUS: + return kvm_s390_vcpu_store_status(vcpu, arg); + case KVM_S390_SET_INITIAL_PSW: { + psw_t psw; + + if (copy_from_user(&psw, argp, sizeof(psw))) + return -EFAULT; + return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); + } + case KVM_S390_INITIAL_RESET: + return kvm_arch_vcpu_ioctl_initial_reset(vcpu); + default: + ; + } + return -EINVAL; +} + +/* Section: memory related */ +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + /* A few sanity checks. We can have exactly one memory slot which has + to start at guest virtual zero and which has to be located at a + page boundary in userland and which has to end at a page boundary. + The memory in userland is ok to be fragmented into various different + vmas. It is okay to mmap() and munmap() stuff in this slot after + doing this call at any time */ + + if (mem->slot) + return -EINVAL; + + if (mem->guest_phys_addr) + return -EINVAL; + + if (mem->userspace_addr & (PAGE_SIZE - 1)) + return -EINVAL; + + if (mem->memory_size & (PAGE_SIZE - 1)) + return -EINVAL; + + kvm->arch.guest_origin = mem->userspace_addr; + kvm->arch.guest_memsize = mem->memory_size; + + /* FIXME: we do want to interrupt running CPUs and update their memory + configuration now to avoid race conditions. But hey, changing the + memory layout while virtual CPUs are running is usually bad + programming practice. */ + + return 0; +} + +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn; +} + +static int __init kvm_s390_init(void) +{ + return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); +} + +static void __exit kvm_s390_exit(void) +{ + kvm_exit(); +} + +module_init(kvm_s390_init); +module_exit(kvm_s390_exit); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h new file mode 100644 index 00000000000..3893cf12eac --- /dev/null +++ b/arch/s390/kvm/kvm-s390.h @@ -0,0 +1,64 @@ +/* + * kvm_s390.h - definition for kvm on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#ifndef ARCH_S390_KVM_S390_H +#define ARCH_S390_KVM_S390_H + +#include <linux/kvm.h> +#include <linux/kvm_host.h> + +typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); + +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); + +#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ + d_args); \ +} while (0) + +#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \ + "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \ + d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\ + d_args); \ +} while (0) + +static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; +} + +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); +void kvm_s390_idle_wakeup(unsigned long data); +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); + +/* implemented in priv.c */ +int kvm_s390_handle_priv(struct kvm_vcpu *vcpu); + +/* implemented in sigp.c */ +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); + +/* implemented in kvm-s390.c */ +int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, + unsigned long addr); +/* implemented in diag.c */ +int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); + +#endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c new file mode 100644 index 00000000000..1465946325c --- /dev/null +++ b/arch/s390/kvm/priv.c @@ -0,0 +1,323 @@ +/* + * priv.c - handling privileged instructions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/errno.h> +#include <asm/current.h> +#include <asm/debug.h> +#include <asm/ebcdic.h> +#include <asm/sysinfo.h> +#include "gaccess.h" +#include "kvm-s390.h" + +static int handle_set_prefix(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + u32 address = 0; + u8 tmp; + + vcpu->stat.instruction_spx++; + + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + /* must be word boundary */ + if (operand2 & 3) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + /* get the value */ + if (get_guest_u32(vcpu, operand2, &address)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + address = address & 0x7fffe000u; + + /* make sure that the new value is valid memory */ + if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || + (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + vcpu->arch.sie_block->prefix = address; + vcpu->arch.sie_block->ihcpu = 0xffff; + + VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); +out: + return 0; +} + +static int handle_store_prefix(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + u32 address; + + vcpu->stat.instruction_stpx++; + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + /* must be word boundary */ + if (operand2 & 3) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + address = vcpu->arch.sie_block->prefix; + address = address & 0x7fffe000u; + + /* get the value */ + if (put_guest_u32(vcpu, operand2, address)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); +out: + return 0; +} + +static int handle_store_cpu_address(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 useraddr; + int rc; + + vcpu->stat.instruction_stap++; + useraddr = disp2; + if (base2) + useraddr += vcpu->arch.guest_gprs[base2]; + + if (useraddr & 1) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr); +out: + return 0; +} + +static int handle_skey(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_storage_key++; + vcpu->arch.sie_block->gpsw.addr -= 4; + VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); + return 0; +} + +static int handle_stsch(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_stsch++; + VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3"); + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + return 0; +} + +static int handle_chsc(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_chsc++; + VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3"); + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + return 0; +} + +static unsigned int kvm_stfl(void) +{ + asm volatile( + " .insn s,0xb2b10000,0(0)\n" /* stfl */ + "0:\n" + EX_TABLE(0b, 0b)); + return S390_lowcore.stfl_fac_list; +} + +static int handle_stfl(struct kvm_vcpu *vcpu) +{ + unsigned int facility_list = kvm_stfl(); + int rc; + + vcpu->stat.instruction_stfl++; + facility_list &= ~(1UL<<24); /* no stfle */ + + rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), + &facility_list, sizeof(facility_list)); + if (rc == -EFAULT) + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + else + VCPU_EVENT(vcpu, 5, "store facility list value %x", + facility_list); + return 0; +} + +static int handle_stidp(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + int rc; + + vcpu->stat.instruction_stidp++; + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + if (operand2 & 7) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); +out: + return 0; +} + +static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + int cpus = 0; + int n; + + spin_lock_bh(&fi->lock); + for (n = 0; n < KVM_MAX_VCPUS; n++) + if (fi->local_int[n]) + cpus++; + spin_unlock_bh(&fi->lock); + + /* deal with other level 3 hypervisors */ + if (stsi(mem, 3, 2, 2) == -ENOSYS) + mem->count = 0; + if (mem->count < 8) + mem->count++; + for (n = mem->count - 1; n > 0 ; n--) + memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0])); + + mem->vm[0].cpus_total = cpus; + mem->vm[0].cpus_configured = cpus; + mem->vm[0].cpus_standby = 0; + mem->vm[0].cpus_reserved = 0; + mem->vm[0].caf = 1000; + memcpy(mem->vm[0].name, "KVMguest", 8); + ASCEBC(mem->vm[0].name, 8); + memcpy(mem->vm[0].cpi, "KVM/Linux ", 16); + ASCEBC(mem->vm[0].cpi, 16); +} + +static int handle_stsi(struct kvm_vcpu *vcpu) +{ + int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28; + int sel1 = vcpu->arch.guest_gprs[0] & 0xff; + int sel2 = vcpu->arch.guest_gprs[1] & 0xffff; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + unsigned long mem; + + vcpu->stat.instruction_stsi++; + VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); + + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + if (operand2 & 0xfff && fc > 0) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (fc) { + case 0: + vcpu->arch.guest_gprs[0] = 3 << 28; + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + return 0; + case 1: /* same handling for 1 and 2 */ + case 2: + mem = get_zeroed_page(GFP_KERNEL); + if (!mem) + goto out_fail; + if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS) + goto out_mem; + break; + case 3: + if (sel1 != 2 || sel2 != 2) + goto out_fail; + mem = get_zeroed_page(GFP_KERNEL); + if (!mem) + goto out_fail; + handle_stsi_3_2_2(vcpu, (void *) mem); + break; + default: + goto out_fail; + } + + if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out_mem; + } + free_page(mem); + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.guest_gprs[0] = 0; + return 0; +out_mem: + free_page(mem); +out_fail: + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; + return 0; +} + +static intercept_handler_t priv_handlers[256] = { + [0x02] = handle_stidp, + [0x10] = handle_set_prefix, + [0x11] = handle_store_prefix, + [0x12] = handle_store_cpu_address, + [0x29] = handle_skey, + [0x2a] = handle_skey, + [0x2b] = handle_skey, + [0x34] = handle_stsch, + [0x5f] = handle_chsc, + [0x7d] = handle_stsi, + [0xb1] = handle_stfl, +}; + +int kvm_s390_handle_priv(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); + return -ENOTSUPP; +} diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S new file mode 100644 index 00000000000..934fd6a885f --- /dev/null +++ b/arch/s390/kvm/sie64a.S @@ -0,0 +1,47 @@ +/* + * sie64a.S - low level sie call + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/errno.h> +#include <asm/asm-offsets.h> + +SP_R5 = 5 * 8 # offset into stackframe +SP_R6 = 6 * 8 + +/* + * sie64a calling convention: + * %r2 pointer to sie control block + * %r3 guest register save area + */ + .globl sie64a +sie64a: + lgr %r5,%r3 + stmg %r5,%r14,SP_R5(%r15) # save register on entry + lgr %r14,%r2 # pointer to sie control block + lmg %r0,%r13,0(%r3) # load guest gprs 0-13 +sie_inst: + sie 0(%r14) + lg %r14,SP_R5(%r15) + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lghi %r2,0 + lmg %r6,%r14,SP_R6(%r15) + br %r14 + +sie_err: + lg %r14,SP_R5(%r15) + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lghi %r2,-EFAULT + lmg %r6,%r14,SP_R6(%r15) + br %r14 + + .section __ex_table,"a" + .quad sie_inst,sie_err + .previous diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c new file mode 100644 index 00000000000..0a236acfb5f --- /dev/null +++ b/arch/s390/kvm/sigp.c @@ -0,0 +1,288 @@ +/* + * sigp.c - handlinge interprocessor communication + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include "gaccess.h" +#include "kvm-s390.h" + +/* sigp order codes */ +#define SIGP_SENSE 0x01 +#define SIGP_EXTERNAL_CALL 0x02 +#define SIGP_EMERGENCY 0x03 +#define SIGP_START 0x04 +#define SIGP_STOP 0x05 +#define SIGP_RESTART 0x06 +#define SIGP_STOP_STORE_STATUS 0x09 +#define SIGP_INITIAL_CPU_RESET 0x0b +#define SIGP_CPU_RESET 0x0c +#define SIGP_SET_PREFIX 0x0d +#define SIGP_STORE_STATUS_ADDR 0x0e +#define SIGP_SET_ARCH 0x12 + +/* cpu status bits */ +#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL +#define SIGP_STAT_INCORRECT_STATE 0x00000200UL +#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL +#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL +#define SIGP_STAT_STOPPED 0x00000040UL +#define SIGP_STAT_OPERATOR_INTERV 0x00000020UL +#define SIGP_STAT_CHECK_STOP 0x00000010UL +#define SIGP_STAT_INOPERATIVE 0x00000004UL +#define SIGP_STAT_INVALID_ORDER 0x00000002UL +#define SIGP_STAT_RECEIVER_CHECK 0x00000001UL + + +static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock_bh(&fi->lock); + if (fi->local_int[cpu_addr] == NULL) + rc = 3; /* not operational */ + else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_RUNNING) { + *reg &= 0xffffffff00000000UL; + rc = 1; /* status stored */ + } else { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STAT_STOPPED; + rc = 1; /* status stored */ + } + spin_unlock_bh(&fi->lock); + + VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); + return rc; +} + +static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct local_interrupt *li; + struct interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_INT_EMERGENCY; + + spin_lock_bh(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ +unlock: + spin_unlock_bh(&fi->lock); + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); + return rc; +} + +static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct local_interrupt *li; + struct interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_SIGP_STOP; + + spin_lock_bh(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); + if (store) + li->action_bits |= ACTION_STORE_ON_STOP; + li->action_bits |= ACTION_STOP_ON_STOP; + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ +unlock: + spin_unlock_bh(&fi->lock); + VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); + return rc; +} + +static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) +{ + int rc; + + switch (parameter & 0xff) { + case 0: + printk(KERN_WARNING "kvm: request to switch to ESA/390 mode" + " not supported"); + rc = 3; /* not operational */ + break; + case 1: + case 2: + rc = 0; /* order accepted */ + break; + default: + rc = -ENOTSUPP; + } + return rc; +} + +static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, + u64 *reg) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct local_interrupt *li; + struct interrupt_info *inti; + int rc; + u8 tmp; + + /* make sure that the new value is valid memory */ + address = address & 0x7fffe000u; + if ((copy_from_guest(vcpu, &tmp, + (u64) (address + vcpu->kvm->arch.guest_origin) , 1)) || + (copy_from_guest(vcpu, &tmp, (u64) (address + + vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) { + *reg |= SIGP_STAT_INVALID_PARAMETER; + return 1; /* invalid parameter */ + } + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return 2; /* busy */ + + spin_lock_bh(&fi->lock); + li = fi->local_int[cpu_addr]; + + if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { + rc = 1; /* incorrect state */ + *reg &= SIGP_STAT_INCORRECT_STATE; + kfree(inti); + goto out_fi; + } + + spin_lock_bh(&li->lock); + /* cpu must be in stopped state */ + if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + rc = 1; /* incorrect state */ + *reg &= SIGP_STAT_INCORRECT_STATE; + kfree(inti); + goto out_li; + } + + inti->type = KVM_S390_SIGP_SET_PREFIX; + inti->prefix.address = address; + + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + rc = 0; /* order accepted */ + + VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); +out_li: + spin_unlock_bh(&li->lock); +out_fi: + spin_unlock_bh(&fi->lock); + return rc; +} + +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) +{ + int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int r3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u32 parameter; + u16 cpu_addr = vcpu->arch.guest_gprs[r3]; + u8 order_code; + int rc; + + order_code = disp2; + if (base2) + order_code += vcpu->arch.guest_gprs[base2]; + + if (r1 % 2) + parameter = vcpu->arch.guest_gprs[r1]; + else + parameter = vcpu->arch.guest_gprs[r1 + 1]; + + switch (order_code) { + case SIGP_SENSE: + vcpu->stat.instruction_sigp_sense++; + rc = __sigp_sense(vcpu, cpu_addr, + &vcpu->arch.guest_gprs[r1]); + break; + case SIGP_EMERGENCY: + vcpu->stat.instruction_sigp_emergency++; + rc = __sigp_emergency(vcpu, cpu_addr); + break; + case SIGP_STOP: + vcpu->stat.instruction_sigp_stop++; + rc = __sigp_stop(vcpu, cpu_addr, 0); + break; + case SIGP_STOP_STORE_STATUS: + vcpu->stat.instruction_sigp_stop++; + rc = __sigp_stop(vcpu, cpu_addr, 1); + break; + case SIGP_SET_ARCH: + vcpu->stat.instruction_sigp_arch++; + rc = __sigp_set_arch(vcpu, parameter); + break; + case SIGP_SET_PREFIX: + vcpu->stat.instruction_sigp_prefix++; + rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, + &vcpu->arch.guest_gprs[r1]); + break; + case SIGP_RESTART: + vcpu->stat.instruction_sigp_restart++; + /* user space must know about restart */ + default: + return -ENOTSUPP; + } + + if (rc < 0) + return rc; + + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44; + return 0; +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index fd072013f88..5c1aea97cd1 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -30,11 +30,27 @@ #define TABLES_PER_PAGE 4 #define FRAG_MASK 15UL #define SECOND_HALVES 10UL + +void clear_table_pgstes(unsigned long *table) +{ + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); + memset(table + 256, 0, PAGE_SIZE/4); + clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); + memset(table + 768, 0, PAGE_SIZE/4); +} + #else #define ALLOC_ORDER 2 #define TABLES_PER_PAGE 2 #define FRAG_MASK 3UL #define SECOND_HALVES 2UL + +void clear_table_pgstes(unsigned long *table) +{ + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); + memset(table + 256, 0, PAGE_SIZE/2); +} + #endif unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) @@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long *table; unsigned long bits; - bits = mm->context.noexec ? 3UL : 1UL; + bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; spin_lock(&mm->page_table_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { @@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm) pgtable_page_ctor(page); page->flags &= ~FRAG_MASK; table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + if (mm->context.pgstes) + clear_table_pgstes(table); + else + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); spin_lock(&mm->page_table_lock); list_add(&page->lru, &mm->context.pgtable_list); } @@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned long bits; - bits = mm->context.noexec ? 3UL : 1UL; + bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock(&mm->page_table_lock); @@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) mm->context.noexec = 0; update_mm(mm, tsk); } + +/* + * switch on pgstes for its userspace process (for kvm) + */ +int s390_enable_sie(void) +{ + struct task_struct *tsk = current; + struct mm_struct *mm; + int rc; + + task_lock(tsk); + + rc = 0; + if (tsk->mm->context.pgstes) + goto unlock; + + rc = -EINVAL; + if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || + tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) + goto unlock; + + tsk->mm->context.pgstes = 1; /* dirty little tricks .. */ + mm = dup_mm(tsk); + tsk->mm->context.pgstes = 0; + + rc = -ENOMEM; + if (!mm) + goto unlock; + mmput(tsk->mm); + tsk->mm = tsk->active_mm = mm; + preempt_disable(); + update_mm(mm, tsk); + cpu_set(smp_processor_id(), mm->cpu_vm_mask); + preempt_enable(); + rc = 0; +unlock: + task_unlock(tsk); + return rc; +} +EXPORT_SYMBOL_GPL(s390_enable_sie); diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 53dde060736..d7df26bd1e5 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -307,15 +307,6 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_MEMORY_HOTPLUG -void online_page(struct page *page) -{ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalram_pages++; - num_physpages++; -} - int arch_add_memory(int nid, u64 start, u64 size) { pg_data_t *pgdat; diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index 70c0dd22491..e7f35198ae3 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -357,8 +357,6 @@ void flush_thread(void) { current_thread_info()->w_saved = 0; - /* No new signal delivery by default */ - current->thread.new_signal = 0; #ifndef CONFIG_SMP if(last_task_used_math == current) { #else diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c index 3e849e8e348..3c312290c3c 100644 --- a/arch/sparc/kernel/signal.c +++ b/arch/sparc/kernel/signal.c @@ -1,5 +1,4 @@ -/* $Id: signal.c,v 1.110 2002/02/08 03:57:14 davem Exp $ - * linux/arch/sparc/kernel/signal.c +/* linux/arch/sparc/kernel/signal.c * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -32,37 +31,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr, void *fpqueue, unsigned long *fpqdepth); extern void fpload(unsigned long *fpregs, unsigned long *fsr); -/* Signal frames: the original one (compatible with SunOS): - * - * Set up a signal frame... Make the stack look the way SunOS - * expects it to look which is basically: - * - * ---------------------------------- <-- %sp at signal time - * Struct sigcontext - * Signal address - * Ptr to sigcontext area above - * Signal code - * The signal number itself - * One register window - * ---------------------------------- <-- New %sp - */ -struct signal_sframe { - struct reg_window sig_window; - int sig_num; - int sig_code; - struct sigcontext __user *sig_scptr; - int sig_address; - struct sigcontext sig_context; - unsigned int extramask[_NSIG_WORDS - 1]; -}; - -/* - * And the new one, intended to be used for Linux applications only - * (we have enough in there to work with clone). - * All the interesting bits are in the info field. - */ - -struct new_signal_frame { +struct signal_frame { struct sparc_stackf ss; __siginfo_t info; __siginfo_fpu_t __user *fpu_save; @@ -85,8 +54,7 @@ struct rt_signal_frame { }; /* Align macros */ -#define SF_ALIGNEDSZ (((sizeof(struct signal_sframe) + 7) & (~7))) -#define NF_ALIGNEDSZ (((sizeof(struct new_signal_frame) + 7) & (~7))) +#define SF_ALIGNEDSZ (((sizeof(struct signal_frame) + 7) & (~7))) #define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame) + 7) & (~7))) static int _sigpause_common(old_sigset_t set) @@ -141,15 +109,20 @@ restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) return err; } -static inline void do_new_sigreturn (struct pt_regs *regs) +asmlinkage void do_sigreturn(struct pt_regs *regs) { - struct new_signal_frame __user *sf; + struct signal_frame __user *sf; unsigned long up_psr, pc, npc; sigset_t set; __siginfo_fpu_t __user *fpu_save; int err; - sf = (struct new_signal_frame __user *) regs->u_regs[UREG_FP]; + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + synchronize_user_stack(); + + sf = (struct signal_frame __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ if (!access_ok(VERIFY_READ, sf, sizeof(*sf))) @@ -198,73 +171,6 @@ segv_and_exit: force_sig(SIGSEGV, current); } -asmlinkage void do_sigreturn(struct pt_regs *regs) -{ - struct sigcontext __user *scptr; - unsigned long pc, npc, psr; - sigset_t set; - int err; - - /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; - - synchronize_user_stack(); - - if (current->thread.new_signal) { - do_new_sigreturn(regs); - return; - } - - scptr = (struct sigcontext __user *) regs->u_regs[UREG_I0]; - - /* Check sanity of the user arg. */ - if (!access_ok(VERIFY_READ, scptr, sizeof(struct sigcontext)) || - (((unsigned long) scptr) & 3)) - goto segv_and_exit; - - err = __get_user(pc, &scptr->sigc_pc); - err |= __get_user(npc, &scptr->sigc_npc); - - if ((pc | npc) & 3) - goto segv_and_exit; - - /* This is pretty much atomic, no amount locking would prevent - * the races which exist anyways. - */ - err |= __get_user(set.sig[0], &scptr->sigc_mask); - /* Note that scptr + 1 points to extramask */ - err |= __copy_from_user(&set.sig[1], scptr + 1, - (_NSIG_WORDS - 1) * sizeof(unsigned int)); - - if (err) - goto segv_and_exit; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - regs->pc = pc; - regs->npc = npc; - - err = __get_user(regs->u_regs[UREG_FP], &scptr->sigc_sp); - err |= __get_user(regs->u_regs[UREG_I0], &scptr->sigc_o0); - err |= __get_user(regs->u_regs[UREG_G1], &scptr->sigc_g1); - - /* User can only change condition codes in %psr. */ - err |= __get_user(psr, &scptr->sigc_psr); - if (err) - goto segv_and_exit; - - regs->psr &= ~(PSR_ICC); - regs->psr |= (psr & PSR_ICC); - return; - -segv_and_exit: - force_sig(SIGSEGV, current); -} - asmlinkage void do_rt_sigreturn(struct pt_regs *regs) { struct rt_signal_frame __user *sf; @@ -351,128 +257,6 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re return (void __user *)(sp - framesize); } -static inline void -setup_frame(struct sigaction *sa, struct pt_regs *regs, int signr, sigset_t *oldset, siginfo_t *info) -{ - struct signal_sframe __user *sframep; - struct sigcontext __user *sc; - int window = 0, err; - unsigned long pc = regs->pc; - unsigned long npc = regs->npc; - struct thread_info *tp = current_thread_info(); - void __user *sig_address; - int sig_code; - - synchronize_user_stack(); - sframep = (struct signal_sframe __user *) - get_sigframe(sa, regs, SF_ALIGNEDSZ); - if (invalid_frame_pointer(sframep, sizeof(*sframep))){ - /* Don't change signal code and address, so that - * post mortem debuggers can have a look. - */ - goto sigill_and_return; - } - - sc = &sframep->sig_context; - - /* We've already made sure frame pointer isn't in kernel space... */ - err = __put_user((sas_ss_flags(regs->u_regs[UREG_FP]) == SS_ONSTACK), - &sc->sigc_onstack); - err |= __put_user(oldset->sig[0], &sc->sigc_mask); - err |= __copy_to_user(sframep->extramask, &oldset->sig[1], - (_NSIG_WORDS - 1) * sizeof(unsigned int)); - err |= __put_user(regs->u_regs[UREG_FP], &sc->sigc_sp); - err |= __put_user(pc, &sc->sigc_pc); - err |= __put_user(npc, &sc->sigc_npc); - err |= __put_user(regs->psr, &sc->sigc_psr); - err |= __put_user(regs->u_regs[UREG_G1], &sc->sigc_g1); - err |= __put_user(regs->u_regs[UREG_I0], &sc->sigc_o0); - err |= __put_user(tp->w_saved, &sc->sigc_oswins); - if (tp->w_saved) - for (window = 0; window < tp->w_saved; window++) { - put_user((char *)tp->rwbuf_stkptrs[window], - &sc->sigc_spbuf[window]); - err |= __copy_to_user(&sc->sigc_wbuf[window], - &tp->reg_window[window], - sizeof(struct reg_window)); - } - else - err |= __copy_to_user(sframep, (char *) regs->u_regs[UREG_FP], - sizeof(struct reg_window)); - - tp->w_saved = 0; /* So process is allowed to execute. */ - - err |= __put_user(signr, &sframep->sig_num); - sig_address = NULL; - sig_code = 0; - if (SI_FROMKERNEL (info) && (info->si_code & __SI_MASK) == __SI_FAULT) { - sig_address = info->si_addr; - switch (signr) { - case SIGSEGV: - switch (info->si_code) { - case SEGV_MAPERR: sig_code = SUBSIG_NOMAPPING; break; - default: sig_code = SUBSIG_PROTECTION; break; - } - break; - case SIGILL: - switch (info->si_code) { - case ILL_ILLOPC: sig_code = SUBSIG_ILLINST; break; - case ILL_PRVOPC: sig_code = SUBSIG_PRIVINST; break; - case ILL_ILLTRP: sig_code = SUBSIG_BADTRAP(info->si_trapno); break; - default: sig_code = SUBSIG_STACK; break; - } - break; - case SIGFPE: - switch (info->si_code) { - case FPE_INTDIV: sig_code = SUBSIG_IDIVZERO; break; - case FPE_INTOVF: sig_code = SUBSIG_FPINTOVFL; break; - case FPE_FLTDIV: sig_code = SUBSIG_FPDIVZERO; break; - case FPE_FLTOVF: sig_code = SUBSIG_FPOVFLOW; break; - case FPE_FLTUND: sig_code = SUBSIG_FPUNFLOW; break; - case FPE_FLTRES: sig_code = SUBSIG_FPINEXACT; break; - case FPE_FLTINV: sig_code = SUBSIG_FPOPERROR; break; - default: sig_code = SUBSIG_FPERROR; break; - } - break; - case SIGBUS: - switch (info->si_code) { - case BUS_ADRALN: sig_code = SUBSIG_ALIGNMENT; break; - case BUS_ADRERR: sig_code = SUBSIG_MISCERROR; break; - default: sig_code = SUBSIG_BUSTIMEOUT; break; - } - break; - case SIGEMT: - switch (info->si_code) { - case EMT_TAGOVF: sig_code = SUBSIG_TAG; break; - } - break; - case SIGSYS: - if (info->si_code == (__SI_FAULT|0x100)) { - sig_code = info->si_trapno; - break; - } - default: - sig_address = NULL; - } - } - err |= __put_user((unsigned long)sig_address, &sframep->sig_address); - err |= __put_user(sig_code, &sframep->sig_code); - err |= __put_user(sc, &sframep->sig_scptr); - if (err) - goto sigsegv; - - regs->u_regs[UREG_FP] = (unsigned long) sframep; - regs->pc = (unsigned long) sa->sa_handler; - regs->npc = (regs->pc + 4); - return; - -sigill_and_return: - do_exit(SIGILL); -sigsegv: - force_sigsegv(signr, current); -} - - static inline int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) { @@ -508,21 +292,20 @@ save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) return err; } -static inline void -new_setup_frame(struct k_sigaction *ka, struct pt_regs *regs, - int signo, sigset_t *oldset) +static void setup_frame(struct k_sigaction *ka, struct pt_regs *regs, + int signo, sigset_t *oldset) { - struct new_signal_frame __user *sf; + struct signal_frame __user *sf; int sigframe_size, err; /* 1. Make sure everything is clean */ synchronize_user_stack(); - sigframe_size = NF_ALIGNEDSZ; + sigframe_size = SF_ALIGNEDSZ; if (!used_math()) sigframe_size -= sizeof(__siginfo_fpu_t); - sf = (struct new_signal_frame __user *) + sf = (struct signal_frame __user *) get_sigframe(&ka->sa, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) @@ -586,9 +369,8 @@ sigsegv: force_sigsegv(signo, current); } -static inline void -new_setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, - int signo, sigset_t *oldset, siginfo_t *info) +static void setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, + int signo, sigset_t *oldset, siginfo_t *info) { struct rt_signal_frame __user *sf; int sigframe_size; @@ -674,11 +456,9 @@ handle_signal(unsigned long signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { if (ka->sa.sa_flags & SA_SIGINFO) - new_setup_rt_frame(ka, regs, signr, oldset, info); - else if (current->thread.new_signal) - new_setup_frame(ka, regs, signr, oldset); + setup_rt_frame(ka, regs, signr, oldset, info); else - setup_frame(&ka->sa, regs, signr, oldset, info); + setup_frame(ka, regs, signr, oldset); spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c index 42bf09db9a8..f188b5dc9fd 100644 --- a/arch/sparc/kernel/sys_sparc.c +++ b/arch/sparc/kernel/sys_sparc.c @@ -1,5 +1,4 @@ -/* $Id: sys_sparc.c,v 1.70 2001/04/14 01:12:02 davem Exp $ - * linux/arch/sparc/kernel/sys_sparc.c +/* linux/arch/sparc/kernel/sys_sparc.c * * This file contains various random system calls that * have a non-standard calling sequence on the Linux/sparc @@ -395,10 +394,8 @@ sparc_sigaction (int sig, const struct old_sigaction __user *act, struct k_sigaction new_ka, old_ka; int ret; - if (sig < 0) { - current->thread.new_signal = 1; - sig = -sig; - } + WARN_ON_ONCE(sig >= 0); + sig = -sig; if (act) { unsigned long mask; @@ -446,11 +443,6 @@ sys_rt_sigaction(int sig, if (sigsetsize != sizeof(sigset_t)) return -EINVAL; - /* All tasks which use RT signals (effectively) use - * new style signals. - */ - current->thread.new_signal = 1; - if (act) { new_ka.ka_restorer = restorer; if (copy_from_user(&new_ka.sa, act, sizeof(*act))) diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 8acc5cc3862..edbe71e3fab 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -1,9 +1,5 @@ -# $Id: config.in,v 1.158 2002/01/24 22:14:44 davem Exp $ -# For a description of the syntax of this configuration file, -# see the Configure script. -# - -mainmenu "Linux/UltraSPARC Kernel Configuration" +# sparc64 configuration +mainmenu "Linux Kernel Configuration for 64-bit SPARC" config SPARC bool @@ -17,12 +13,6 @@ config SPARC64 default y select HAVE_IDE select HAVE_LMB - help - SPARC is a family of RISC microprocessors designed and marketed by - Sun Microsystems, incorporated. This port covers the newer 64-bit - UltraSPARC. The UltraLinux project maintains both the SPARC32 and - SPARC64 ports; its web page is available at - <http://www.ultralinux.org/>. config GENERIC_TIME bool @@ -97,7 +87,7 @@ config SPARC64_PAGE_SIZE_8KB help This lets you select the page size of the kernel. - 8KB and 64KB work quite well, since Sparc ELF sections + 8KB and 64KB work quite well, since SPARC ELF sections provide for up to 64KB alignment. Therefore, 512KB and 4MB are for expert hackers only. @@ -138,7 +128,7 @@ config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" depends on SMP select HOTPLUG - ---help--- + help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. @@ -155,23 +145,16 @@ source "kernel/time/Kconfig" config SMP bool "Symmetric multi-processing support" - ---help--- + help This enables support for systems with more than one CPU. If you have a system with only one CPU, say N. If you have a system with more than one CPU, say Y. If you say N here, the kernel will run on single and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If - you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel - will run faster if you say N here. - - People using multiprocessor machines who say Y here should also say - Y to "Enhanced Real Time Clock Support", below. The "Advanced Power - Management" code will be disabled if you say Y here. - - See also <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO - available at <http://www.tldp.org/docs.html#howto>. + you say Y here, the kernel will run on single-processor machines. + On a single-processor machine, the kernel will run faster if you say + N here. If you don't know what to do here, say N. @@ -284,50 +267,19 @@ source "mm/Kconfig" config ISA bool - help - Find out whether you have ISA slots on your motherboard. ISA is the - name of a bus system, i.e. the way the CPU talks to the other stuff - inside your box. Other bus systems are PCI, EISA, MicroChannel - (MCA) or VESA. ISA is an older system, now being displaced by PCI; - newer boards don't support it. If you have ISA, say Y, otherwise N. config ISAPNP bool - help - Say Y here if you would like support for ISA Plug and Play devices. - Some information is in <file:Documentation/isapnp.txt>. - - To compile this driver as a module, choose M here: the - module will be called isapnp. - - If unsure, say Y. config EISA bool - ---help--- - The Extended Industry Standard Architecture (EISA) bus was - developed as an open alternative to the IBM MicroChannel bus. - - The EISA bus provided some of the features of the IBM MicroChannel - bus while maintaining backward compatibility with cards made for - the older ISA bus. The EISA bus saw limited use between 1988 and - 1995 when it was made obsolete by the PCI bus. - - Say Y here if you are building a kernel for an EISA-based machine. - - Otherwise, say N. config MCA bool - help - MicroChannel Architecture is found in some IBM PS/2 machines and - laptops. It is a bus system similar to PCI or ISA. See - <file:Documentation/mca.txt> (and especially the web page given - there) before attempting to build an MCA bus kernel. config PCMCIA tristate - ---help--- + help Say Y here if you want to attach PCMCIA- or PC-cards to your Linux computer. These are credit-card size devices such as network cards, modems or hard drives often used with laptops computers. There are @@ -369,10 +321,10 @@ config PCI bool "PCI support" select ARCH_SUPPORTS_MSI help - Find out whether you have a PCI motherboard. PCI is the name of a - bus system, i.e. the way the CPU talks to the other stuff inside - your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or - VESA. If you have PCI, say Y, otherwise N. + Find out whether your system includes a PCI bus. PCI is the name of + a bus system, i.e. the way the CPU talks to the other stuff inside + your box. If you say Y here, the kernel will include drivers and + infrastructure code to support PCI bus devices. config PCI_DOMAINS def_bool PCI @@ -396,15 +348,8 @@ menu "Executable file formats" source "fs/Kconfig.binfmt" -config SPARC32_COMPAT - bool "Kernel support for Linux/Sparc 32bit binary compatibility" - help - This allows you to run 32-bit binaries on your Ultra. - Everybody wants this; say Y. - config COMPAT bool - depends on SPARC32_COMPAT default y select COMPAT_BINFMT_ELF @@ -421,8 +366,8 @@ config SCHED_SMT default y help SMT scheduler support improves the CPU scheduler's decision making - when dealing with UltraSPARC cpus at a cost of slightly increased - overhead in some places. If unsure say N here. + when dealing with SPARC cpus at a cost of slightly increased overhead + in some places. If unsure say N here. config SCHED_MC bool "Multi-core scheduler support" diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index 92f79680f70..aff93c9d13f 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.25-numa -# Wed Apr 23 04:49:08 2008 +# Linux kernel version: 2.6.25 +# Sat Apr 26 03:11:06 2008 # CONFIG_SPARC=y CONFIG_SPARC64=y @@ -152,7 +152,9 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_HUGETLB_PAGE_SIZE_4MB=y # CONFIG_HUGETLB_PAGE_SIZE_512K is not set # CONFIG_HUGETLB_PAGE_SIZE_64K is not set -# CONFIG_NUMA is not set +CONFIG_NUMA=y +CONFIG_NODES_SHIFT=4 +CONFIG_NODES_SPAN_OTHER_NODES=y CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_SPARSEMEM_ENABLE=y @@ -162,12 +164,14 @@ CONFIG_SELECT_MEMORY_MODEL=y # CONFIG_DISCONTIGMEM_MANUAL is not set CONFIG_SPARSEMEM_MANUAL=y CONFIG_SPARSEMEM=y +CONFIG_NEED_MULTIPLE_NODES=y CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_MIGRATION=y CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=0 CONFIG_NR_QUICK=1 @@ -191,7 +195,6 @@ CONFIG_SUN_OPENPROMFS=m CONFIG_BINFMT_ELF=y CONFIG_COMPAT_BINFMT_ELF=y CONFIG_BINFMT_MISC=m -CONFIG_SPARC32_COMPAT=y CONFIG_COMPAT=y CONFIG_SYSVIPC_COMPAT=y CONFIG_SCHED_SMT=y @@ -746,13 +749,7 @@ CONFIG_DEVPORT=y CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y # CONFIG_I2C_CHARDEV is not set - -# -# I2C Algorithms -# CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_ALGOPCF is not set -# CONFIG_I2C_ALGOPCA is not set # # I2C Hardware Bus support @@ -780,6 +777,7 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_PLATFORM is not set # # Miscellaneous I2C Chip support @@ -1026,6 +1024,7 @@ CONFIG_SND_ALI5451=m # CONFIG_SND_AU8810 is not set # CONFIG_SND_AU8820 is not set # CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set # CONFIG_SND_AZT3328 is not set # CONFIG_SND_BT87X is not set # CONFIG_SND_CA0106 is not set @@ -1097,10 +1096,6 @@ CONFIG_SND_SUN_CS4231=m # CONFIG_SND_SOC is not set # -# SoC Audio support for SuperH -# - -# # ALSA SoC audio for Freescale SOCs # diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index 63c6ae0dd27..2bd0340b743 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -15,17 +15,17 @@ obj-y := process.o setup.o cpu.o idprom.o \ visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o \ +obj-$(CONFIG_PCI) += ebus.o pci_common.o \ pci_psycho.o pci_sabre.o pci_schizo.o \ pci_sun4v.o pci_sun4v_asm.o pci_fire.o obj-$(CONFIG_PCI_MSI) += pci_msi.o obj-$(CONFIG_SMP) += smp.o trampoline.o hvtramp.o -obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o +obj-$(CONFIG_COMPAT) += sys32.o sys_sparc32.o signal32.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o obj-$(CONFIG_AUDIT) += audit.o -obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o +obj-$(CONFIG_AUDIT)$(CONFIG_COMPAT) += compat_audit.o obj-y += $(obj-yy) diff --git a/arch/sparc64/kernel/audit.c b/arch/sparc64/kernel/audit.c index 24d7f4b4178..8fff0ac63d5 100644 --- a/arch/sparc64/kernel/audit.c +++ b/arch/sparc64/kernel/audit.c @@ -30,7 +30,7 @@ static unsigned signal_class[] = { int audit_classify_arch(int arch) { -#ifdef CONFIG_SPARC32_COMPAT +#ifdef CONFIG_COMPAT if (arch == AUDIT_ARCH_SPARC) return 1; #endif @@ -39,7 +39,7 @@ int audit_classify_arch(int arch) int audit_classify_syscall(int abi, unsigned syscall) { -#ifdef CONFIG_SPARC32_COMPAT +#ifdef CONFIG_COMPAT extern int sparc32_classify_syscall(unsigned); if (abi == AUDIT_ARCH_SPARC) return sparc32_classify_syscall(syscall); @@ -60,7 +60,7 @@ int audit_classify_syscall(int abi, unsigned syscall) static int __init audit_classes_init(void) { -#ifdef CONFIG_SPARC32_COMPAT +#ifdef CONFIG_COMPAT extern __u32 sparc32_dir_class[]; extern __u32 sparc32_write_class[]; extern __u32 sparc32_read_class[]; diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index eb88bd6e674..b441a26b73b 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -1,6 +1,6 @@ /* irq.c: UltraSparc IRQ handling/init/registry. * - * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net) * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) */ @@ -308,6 +308,7 @@ static void sun4u_irq_enable(unsigned int virt_irq) IMAP_AID_SAFARI | IMAP_NID_SAFARI); val |= tid | IMAP_VALID; upa_writeq(val, imap); + upa_writeq(ICLR_IDLE, data->iclr); } } diff --git a/arch/sparc64/kernel/isa.c b/arch/sparc64/kernel/isa.c deleted file mode 100644 index a2af5ed784c..00000000000 --- a/arch/sparc64/kernel/isa.c +++ /dev/null @@ -1,191 +0,0 @@ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/pci.h> -#include <linux/slab.h> -#include <asm/oplib.h> -#include <asm/prom.h> -#include <asm/of_device.h> -#include <asm/isa.h> - -struct sparc_isa_bridge *isa_chain; - -static void __init fatal_err(const char *reason) -{ - prom_printf("ISA: fatal error, %s.\n", reason); -} - -static void __init report_dev(struct sparc_isa_device *isa_dev, int child) -{ - if (child) - printk(" (%s)", isa_dev->prom_node->name); - else - printk(" [%s", isa_dev->prom_node->name); -} - -static void __init isa_dev_get_resource(struct sparc_isa_device *isa_dev) -{ - struct of_device *op = of_find_device_by_node(isa_dev->prom_node); - - memcpy(&isa_dev->resource, &op->resource[0], sizeof(struct resource)); -} - -static void __init isa_dev_get_irq(struct sparc_isa_device *isa_dev) -{ - struct of_device *op = of_find_device_by_node(isa_dev->prom_node); - - if (!op || !op->num_irqs) { - isa_dev->irq = PCI_IRQ_NONE; - } else { - isa_dev->irq = op->irqs[0]; - } -} - -static void __init isa_fill_children(struct sparc_isa_device *parent_isa_dev) -{ - struct device_node *dp = parent_isa_dev->prom_node->child; - - if (!dp) - return; - - printk(" ->"); - while (dp) { - struct sparc_isa_device *isa_dev; - - isa_dev = kzalloc(sizeof(*isa_dev), GFP_KERNEL); - if (!isa_dev) { - fatal_err("cannot allocate child isa_dev"); - prom_halt(); - } - - /* Link it in to parent. */ - isa_dev->next = parent_isa_dev->child; - parent_isa_dev->child = isa_dev; - - isa_dev->bus = parent_isa_dev->bus; - isa_dev->prom_node = dp; - - isa_dev_get_resource(isa_dev); - isa_dev_get_irq(isa_dev); - - report_dev(isa_dev, 1); - - dp = dp->sibling; - } -} - -static void __init isa_fill_devices(struct sparc_isa_bridge *isa_br) -{ - struct device_node *dp = isa_br->prom_node->child; - - while (dp) { - struct sparc_isa_device *isa_dev; - struct dev_archdata *sd; - - isa_dev = kzalloc(sizeof(*isa_dev), GFP_KERNEL); - if (!isa_dev) { - printk(KERN_DEBUG "ISA: cannot allocate isa_dev"); - return; - } - - sd = &isa_dev->ofdev.dev.archdata; - sd->prom_node = dp; - sd->op = &isa_dev->ofdev; - sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu; - sd->stc = isa_br->ofdev.dev.parent->archdata.stc; - sd->numa_node = isa_br->ofdev.dev.parent->archdata.numa_node; - - isa_dev->ofdev.node = dp; - isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev; - isa_dev->ofdev.dev.bus = &isa_bus_type; - sprintf(isa_dev->ofdev.dev.bus_id, "isa[%08x]", dp->node); - - /* Register with core */ - if (of_device_register(&isa_dev->ofdev) != 0) { - printk(KERN_DEBUG "isa: device registration error for %s!\n", - dp->path_component_name); - kfree(isa_dev); - goto next_sibling; - } - - /* Link it in. */ - isa_dev->next = NULL; - if (isa_br->devices == NULL) { - isa_br->devices = isa_dev; - } else { - struct sparc_isa_device *tmp = isa_br->devices; - - while (tmp->next) - tmp = tmp->next; - - tmp->next = isa_dev; - } - - isa_dev->bus = isa_br; - isa_dev->prom_node = dp; - - isa_dev_get_resource(isa_dev); - isa_dev_get_irq(isa_dev); - - report_dev(isa_dev, 0); - - isa_fill_children(isa_dev); - - printk("]"); - - next_sibling: - dp = dp->sibling; - } -} - -void __init isa_init(void) -{ - struct pci_dev *pdev; - unsigned short vendor, device; - int index = 0; - - vendor = PCI_VENDOR_ID_AL; - device = PCI_DEVICE_ID_AL_M1533; - - pdev = NULL; - while ((pdev = pci_get_device(vendor, device, pdev)) != NULL) { - struct sparc_isa_bridge *isa_br; - struct device_node *dp; - - dp = pci_device_to_OF_node(pdev); - - isa_br = kzalloc(sizeof(*isa_br), GFP_KERNEL); - if (!isa_br) { - printk(KERN_DEBUG "isa: cannot allocate sparc_isa_bridge"); - pci_dev_put(pdev); - return; - } - - isa_br->ofdev.node = dp; - isa_br->ofdev.dev.parent = &pdev->dev; - isa_br->ofdev.dev.bus = &isa_bus_type; - sprintf(isa_br->ofdev.dev.bus_id, "isa%d", index); - - /* Register with core */ - if (of_device_register(&isa_br->ofdev) != 0) { - printk(KERN_DEBUG "isa: device registration error for %s!\n", - dp->path_component_name); - kfree(isa_br); - pci_dev_put(pdev); - return; - } - - /* Link it in. */ - isa_br->next = isa_chain; - isa_chain = isa_br; - - isa_br->self = pdev; - isa_br->index = index++; - isa_br->prom_node = dp; - - printk("isa%d:", isa_br->index); - - isa_fill_devices(isa_br); - - printk("\n"); - } -} diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c index 9e58e8cba1c..d569f60c24b 100644 --- a/arch/sparc64/kernel/of_device.c +++ b/arch/sparc64/kernel/of_device.c @@ -412,12 +412,6 @@ static int __init build_one_resource(struct device_node *parent, static int __init use_1to1_mapping(struct device_node *pp) { - /* If this is on the PMU bus, don't try to translate it even - * if a ranges property exists. - */ - if (!strcmp(pp->name, "pmu")) - return 1; - /* If we have a ranges property in the parent, use it. */ if (of_find_property(pp, "ranges", NULL) != NULL) return 0; diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index 49f91276651..dbf2fc2f4d8 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -23,7 +23,6 @@ #include <asm/pgtable.h> #include <asm/irq.h> #include <asm/ebus.h> -#include <asm/isa.h> #include <asm/prom.h> #include <asm/apb.h> @@ -885,7 +884,6 @@ static int __init pcibios_init(void) pci_scan_each_controller_bus(); - isa_init(); ebus_init(); power_init(); diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index acf8c5250aa..05601374915 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -1,5 +1,4 @@ -/* $Id: process.c,v 1.131 2002/02/09 19:49:30 davem Exp $ - * arch/sparc64/kernel/process.c +/* arch/sparc64/kernel/process.c * * Copyright (C) 1995, 1996 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) @@ -368,9 +367,6 @@ void flush_thread(void) if (get_thread_current_ds() != ASI_AIUS) set_fs(USER_DS); - - /* Init new signal delivery disposition. */ - clear_thread_flag(TIF_NEWSIGNALS); } /* It's a bit more tricky when 64-bit tasks are involved... */ @@ -595,6 +591,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, if (clone_flags & CLONE_SETTLS) t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3]; + /* We do not want to accidently trigger system call restart + * handling in the new thread. Therefore, clear out the trap + * type, which will make pt_regs_regs_is_syscall() return false. + */ + pt_regs_clear_trap_type(t->kregs); + return 0; } diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c index 77a3e8592cb..f2d88d8f7a4 100644 --- a/arch/sparc64/kernel/signal.c +++ b/arch/sparc64/kernel/signal.c @@ -8,7 +8,7 @@ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ -#ifdef CONFIG_SPARC32_COMPAT +#ifdef CONFIG_COMPAT #include <linux/compat.h> /* for compat_old_sigset_t */ #endif #include <linux/sched.h> @@ -236,9 +236,6 @@ struct rt_signal_frame { __siginfo_fpu_t fpu_state; }; -/* Align macros */ -#define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame) + 7) & (~7))) - static long _sigpause_common(old_sigset_t set) { set &= _BLOCKABLE; @@ -400,7 +397,7 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, synchronize_user_stack(); save_and_clear_fpu(); - sigframe_size = RT_ALIGNEDSZ; + sigframe_size = sizeof(struct rt_signal_frame); if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) sigframe_size -= sizeof(__siginfo_fpu_t); @@ -516,11 +513,10 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) struct k_sigaction ka; sigset_t *oldset; siginfo_t info; - int signr, tt; + int signr; - tt = regs->magic & 0x1ff; - if (tt == 0x110 || tt == 0x111 || tt == 0x16d) { - regs->magic &= ~0x1ff; + if (pt_regs_is_syscall(regs)) { + pt_regs_clear_trap_type(regs); cookie.restart_syscall = 1; } else cookie.restart_syscall = 0; @@ -531,7 +527,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) else oldset = ¤t->blocked; -#ifdef CONFIG_SPARC32_COMPAT +#ifdef CONFIG_COMPAT if (test_thread_flag(TIF_32BIT)) { extern void do_signal32(sigset_t *, struct pt_regs *, struct signal_deliver_cookie *); diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index 43cdec64d9c..91f8d0826db 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c @@ -1,5 +1,4 @@ -/* $Id: signal32.c,v 1.74 2002/02/09 19:49:30 davem Exp $ - * arch/sparc64/kernel/signal32.c +/* arch/sparc64/kernel/signal32.c * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -31,30 +30,6 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -/* Signal frames: the original one (compatible with SunOS): - * - * Set up a signal frame... Make the stack look the way SunOS - * expects it to look which is basically: - * - * ---------------------------------- <-- %sp at signal time - * Struct sigcontext - * Signal address - * Ptr to sigcontext area above - * Signal code - * The signal number itself - * One register window - * ---------------------------------- <-- New %sp - */ -struct signal_sframe32 { - struct reg_window32 sig_window; - int sig_num; - int sig_code; - /* struct sigcontext32 * */ u32 sig_scptr; - int sig_address; - struct sigcontext32 sig_context; - unsigned int extramask[_COMPAT_NSIG_WORDS - 1]; -}; - /* This magic should be in g_upper[0] for all upper parts * to be valid. */ @@ -65,12 +40,7 @@ typedef struct { unsigned int asi; } siginfo_extra_v8plus_t; -/* - * And the new one, intended to be used for Linux applications only - * (we have enough in there to work with clone). - * All the interesting bits are in the info field. - */ -struct new_signal_frame32 { +struct signal_frame32 { struct sparc_stackf32 ss; __siginfo32_t info; /* __siginfo_fpu32_t * */ u32 fpu_save; @@ -149,8 +119,7 @@ struct rt_signal_frame32 { }; /* Align macros */ -#define SF_ALIGNEDSZ (((sizeof(struct signal_sframe32) + 7) & (~7))) -#define NF_ALIGNEDSZ (((sizeof(struct new_signal_frame32) + 7) & (~7))) +#define SF_ALIGNEDSZ (((sizeof(struct signal_frame32) + 7) & (~7))) #define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame32) + 7) & (~7))) int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) @@ -241,17 +210,22 @@ static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu return err; } -void do_new_sigreturn32(struct pt_regs *regs) +void do_sigreturn32(struct pt_regs *regs) { - struct new_signal_frame32 __user *sf; + struct signal_frame32 __user *sf; unsigned int psr; unsigned pc, npc, fpu_save; sigset_t set; unsigned seta[_COMPAT_NSIG_WORDS]; int err, i; + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + synchronize_user_stack(); + regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL; - sf = (struct new_signal_frame32 __user *) regs->u_regs[UREG_FP]; + sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || @@ -319,76 +293,6 @@ segv: force_sig(SIGSEGV, current); } -asmlinkage void do_sigreturn32(struct pt_regs *regs) -{ - struct sigcontext32 __user *scptr; - unsigned int pc, npc, psr; - sigset_t set; - unsigned int seta[_COMPAT_NSIG_WORDS]; - int err; - - /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; - - synchronize_user_stack(); - if (test_thread_flag(TIF_NEWSIGNALS)) { - do_new_sigreturn32(regs); - return; - } - - scptr = (struct sigcontext32 __user *) - (regs->u_regs[UREG_I0] & 0x00000000ffffffffUL); - /* Check sanity of the user arg. */ - if (!access_ok(VERIFY_READ, scptr, sizeof(struct sigcontext32)) || - (((unsigned long) scptr) & 3)) - goto segv; - - err = __get_user(pc, &scptr->sigc_pc); - err |= __get_user(npc, &scptr->sigc_npc); - - if ((pc | npc) & 3) - goto segv; /* Nice try. */ - - err |= __get_user(seta[0], &scptr->sigc_mask); - /* Note that scptr + 1 points to extramask */ - err |= copy_from_user(seta+1, scptr + 1, - (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int)); - if (err) - goto segv; - switch (_NSIG_WORDS) { - case 4: set.sig[3] = seta[6] + (((long)seta[7]) << 32); - case 3: set.sig[2] = seta[4] + (((long)seta[5]) << 32); - case 2: set.sig[1] = seta[2] + (((long)seta[3]) << 32); - case 1: set.sig[0] = seta[0] + (((long)seta[1]) << 32); - } - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - if (test_thread_flag(TIF_32BIT)) { - pc &= 0xffffffff; - npc &= 0xffffffff; - } - regs->tpc = pc; - regs->tnpc = npc; - err = __get_user(regs->u_regs[UREG_FP], &scptr->sigc_sp); - err |= __get_user(regs->u_regs[UREG_I0], &scptr->sigc_o0); - err |= __get_user(regs->u_regs[UREG_G1], &scptr->sigc_g1); - - /* User can only change condition codes in %tstate. */ - err |= __get_user(psr, &scptr->sigc_psr); - if (err) - goto segv; - regs->tstate &= ~(TSTATE_ICC|TSTATE_XCC); - regs->tstate |= psr_to_tstate_icc(psr); - return; - -segv: - force_sig(SIGSEGV, current); -} - asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) { struct rt_signal_frame32 __user *sf; @@ -504,145 +408,6 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns return (void __user *)(sp - framesize); } -static void -setup_frame32(struct sigaction *sa, struct pt_regs *regs, int signr, sigset_t *oldset, siginfo_t *info) -{ - struct signal_sframe32 __user *sframep; - struct sigcontext32 __user *sc; - unsigned int seta[_COMPAT_NSIG_WORDS]; - int err = 0; - void __user *sig_address; - int sig_code; - unsigned long pc = regs->tpc; - unsigned long npc = regs->tnpc; - unsigned int psr; - - if (test_thread_flag(TIF_32BIT)) { - pc &= 0xffffffff; - npc &= 0xffffffff; - } - - synchronize_user_stack(); - save_and_clear_fpu(); - - sframep = (struct signal_sframe32 __user *) - get_sigframe(sa, regs, SF_ALIGNEDSZ); - if (invalid_frame_pointer(sframep, sizeof(*sframep))){ - /* Don't change signal code and address, so that - * post mortem debuggers can have a look. - */ - do_exit(SIGILL); - } - - sc = &sframep->sig_context; - - /* We've already made sure frame pointer isn't in kernel space... */ - err = __put_user((sas_ss_flags(regs->u_regs[UREG_FP]) == SS_ONSTACK), - &sc->sigc_onstack); - - switch (_NSIG_WORDS) { - case 4: seta[7] = (oldset->sig[3] >> 32); - seta[6] = oldset->sig[3]; - case 3: seta[5] = (oldset->sig[2] >> 32); - seta[4] = oldset->sig[2]; - case 2: seta[3] = (oldset->sig[1] >> 32); - seta[2] = oldset->sig[1]; - case 1: seta[1] = (oldset->sig[0] >> 32); - seta[0] = oldset->sig[0]; - } - err |= __put_user(seta[0], &sc->sigc_mask); - err |= __copy_to_user(sframep->extramask, seta + 1, - (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int)); - err |= __put_user(regs->u_regs[UREG_FP], &sc->sigc_sp); - err |= __put_user(pc, &sc->sigc_pc); - err |= __put_user(npc, &sc->sigc_npc); - psr = tstate_to_psr(regs->tstate); - if (current_thread_info()->fpsaved[0] & FPRS_FEF) - psr |= PSR_EF; - err |= __put_user(psr, &sc->sigc_psr); - err |= __put_user(regs->u_regs[UREG_G1], &sc->sigc_g1); - err |= __put_user(regs->u_regs[UREG_I0], &sc->sigc_o0); - err |= __put_user(get_thread_wsaved(), &sc->sigc_oswins); - - err |= copy_in_user((u32 __user *)sframep, - (u32 __user *)(regs->u_regs[UREG_FP]), - sizeof(struct reg_window32)); - - set_thread_wsaved(0); /* So process is allowed to execute. */ - err |= __put_user(signr, &sframep->sig_num); - sig_address = NULL; - sig_code = 0; - if (SI_FROMKERNEL (info) && (info->si_code & __SI_MASK) == __SI_FAULT) { - sig_address = info->si_addr; - switch (signr) { - case SIGSEGV: - switch (info->si_code) { - case SEGV_MAPERR: sig_code = SUBSIG_NOMAPPING; break; - default: sig_code = SUBSIG_PROTECTION; break; - } - break; - case SIGILL: - switch (info->si_code) { - case ILL_ILLOPC: sig_code = SUBSIG_ILLINST; break; - case ILL_PRVOPC: sig_code = SUBSIG_PRIVINST; break; - case ILL_ILLTRP: sig_code = SUBSIG_BADTRAP(info->si_trapno); break; - default: sig_code = SUBSIG_STACK; break; - } - break; - case SIGFPE: - switch (info->si_code) { - case FPE_INTDIV: sig_code = SUBSIG_IDIVZERO; break; - case FPE_INTOVF: sig_code = SUBSIG_FPINTOVFL; break; - case FPE_FLTDIV: sig_code = SUBSIG_FPDIVZERO; break; - case FPE_FLTOVF: sig_code = SUBSIG_FPOVFLOW; break; - case FPE_FLTUND: sig_code = SUBSIG_FPUNFLOW; break; - case FPE_FLTRES: sig_code = SUBSIG_FPINEXACT; break; - case FPE_FLTINV: sig_code = SUBSIG_FPOPERROR; break; - default: sig_code = SUBSIG_FPERROR; break; - } - break; - case SIGBUS: - switch (info->si_code) { - case BUS_ADRALN: sig_code = SUBSIG_ALIGNMENT; break; - case BUS_ADRERR: sig_code = SUBSIG_MISCERROR; break; - default: sig_code = SUBSIG_BUSTIMEOUT; break; - } - break; - case SIGEMT: - switch (info->si_code) { - case EMT_TAGOVF: sig_code = SUBSIG_TAG; break; - } - break; - case SIGSYS: - if (info->si_code == (__SI_FAULT|0x100)) { - /* See sys_sunos32.c */ - sig_code = info->si_trapno; - break; - } - default: - sig_address = NULL; - } - } - err |= __put_user(ptr_to_compat(sig_address), &sframep->sig_address); - err |= __put_user(sig_code, &sframep->sig_code); - err |= __put_user(ptr_to_compat(sc), &sframep->sig_scptr); - if (err) - goto sigsegv; - - regs->u_regs[UREG_FP] = (unsigned long) sframep; - regs->tpc = (unsigned long) sa->sa_handler; - regs->tnpc = (regs->tpc + 4); - if (test_thread_flag(TIF_32BIT)) { - regs->tpc &= 0xffffffff; - regs->tnpc &= 0xffffffff; - } - return; - -sigsegv: - force_sigsegv(signr, current); -} - - static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) { unsigned long *fpregs = current_thread_info()->fpregs; @@ -663,10 +428,10 @@ static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) return err; } -static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, - int signo, sigset_t *oldset) +static void setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, + int signo, sigset_t *oldset) { - struct new_signal_frame32 __user *sf; + struct signal_frame32 __user *sf; int sigframe_size; u32 psr; int i, err; @@ -676,11 +441,11 @@ static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, synchronize_user_stack(); save_and_clear_fpu(); - sigframe_size = NF_ALIGNEDSZ; + sigframe_size = SF_ALIGNEDSZ; if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) sigframe_size -= sizeof(__siginfo_fpu_t); - sf = (struct new_signal_frame32 __user *) + sf = (struct signal_frame32 __user *) get_sigframe(&ka->sa, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) @@ -944,10 +709,9 @@ static inline void handle_signal32(unsigned long signr, struct k_sigaction *ka, { if (ka->sa.sa_flags & SA_SIGINFO) setup_rt_frame32(ka, regs, signr, oldset, info); - else if (test_thread_flag(TIF_NEWSIGNALS)) - new_setup_frame32(ka, regs, signr, oldset); else - setup_frame32(&ka->sa, regs, signr, oldset, info); + setup_frame32(ka, regs, signr, oldset); + spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NOMASK)) diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 524b8892094..409dd71f273 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -866,14 +866,21 @@ void smp_call_function_client(int irq, struct pt_regs *regs) void *info = call_data->info; clear_softint(1 << irq); + + irq_enter(); + + if (!call_data->wait) { + /* let initiator proceed after getting data */ + atomic_inc(&call_data->finished); + } + + func(info); + + irq_exit(); + if (call_data->wait) { /* let initiator proceed only after completion */ - func(info); atomic_inc(&call_data->finished); - } else { - /* let initiator proceed after getting data */ - atomic_inc(&call_data->finished); - func(info); } } @@ -1032,7 +1039,9 @@ void smp_receive_signal(int cpu) void smp_receive_signal_client(int irq, struct pt_regs *regs) { + irq_enter(); clear_softint(1 << irq); + irq_exit(); } void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) @@ -1040,6 +1049,8 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) struct mm_struct *mm; unsigned long flags; + irq_enter(); + clear_softint(1 << irq); /* See if we need to allocate a new TLB context because @@ -1059,6 +1070,8 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) load_secondary_context(mm); __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); + + irq_exit(); } void smp_new_mmu_context_version(void) @@ -1217,6 +1230,8 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) { clear_softint(1 << irq); + irq_enter(); + preempt_disable(); __asm__ __volatile__("flushw"); @@ -1229,6 +1244,8 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) prom_world(0); preempt_enable(); + + irq_exit(); } /* /proc/profile writes can call this, don't __init it please. */ diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 66336590e83..8ac0b99f2c5 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -49,7 +49,6 @@ #endif #ifdef CONFIG_PCI #include <asm/ebus.h> -#include <asm/isa.h> #endif #include <asm/ns87303.h> #include <asm/timer.h> @@ -187,7 +186,6 @@ EXPORT_SYMBOL(insw); EXPORT_SYMBOL(insl); #ifdef CONFIG_PCI EXPORT_SYMBOL(ebus_chain); -EXPORT_SYMBOL(isa_chain); EXPORT_SYMBOL(pci_alloc_consistent); EXPORT_SYMBOL(pci_free_consistent); EXPORT_SYMBOL(pci_map_single); diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index 73ed01ba40d..8d4761f15fa 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c @@ -454,8 +454,8 @@ asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second, err = sys_semget(first, (int)second, (int)third); goto out; case SEMCTL: { - err = sys_semctl(first, third, - (int)second | IPC_64, + err = sys_semctl(first, second, + (int)third | IPC_64, (union semun) ptr); goto out; } diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index c1a61e98899..161ce4710fe 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -554,10 +554,8 @@ asmlinkage long compat_sys_sigaction(int sig, struct old_sigaction32 __user *act struct k_sigaction new_ka, old_ka; int ret; - if (sig < 0) { - set_thread_flag(TIF_NEWSIGNALS); - sig = -sig; - } + WARN_ON_ONCE(sig >= 0); + sig = -sig; if (act) { compat_old_sigset_t mask; @@ -601,11 +599,6 @@ asmlinkage long compat_sys_rt_sigaction(int sig, if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; - /* All tasks which use RT signals (effectively) use - * new style signals. - */ - set_thread_flag(TIF_NEWSIGNALS); - if (act) { u32 u_handler, u_restorer; diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 177d8aaeec4..8c2b50e8abc 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -1699,9 +1699,21 @@ void __init paging_init(void) * functions like clear_dcache_dirty_cpu use the cpu mask * in 13-bit signed-immediate instruction fields. */ - BUILD_BUG_ON(FLAGS_RESERVED != 32); + + /* + * Page flags must not reach into upper 32 bits that are used + * for the cpu number + */ + BUILD_BUG_ON(NR_PAGEFLAGS > 32); + + /* + * The bit fields placed in the high range must not reach below + * the 32 bit boundary. Otherwise we cannot place the cpu field + * at the 32 bit boundary. + */ BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + - ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED); + ilog2(roundup_pow_of_two(NR_CPUS)) > 32); + BUILD_BUG_ON(NR_CPUS > 4096); kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64 index 3fbe69e359e..5696e7b374b 100644 --- a/arch/um/Kconfig.x86_64 +++ b/arch/um/Kconfig.x86_64 @@ -1,3 +1,10 @@ + +menu "Host processor type and features" + +source "arch/x86/Kconfig.cpu" + +endmenu + config UML_X86 bool default y diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index db3082b4da4..6e51424745a 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -125,7 +125,7 @@ static int open_one_chan(struct chan *chan) return 0; } -int open_chan(struct list_head *chans) +static int open_chan(struct list_head *chans) { struct list_head *ele; struct chan *chan; @@ -583,19 +583,6 @@ int parse_chan_pair(char *str, struct line *line, int device, return 0; } -int chan_out_fd(struct list_head *chans) -{ - struct list_head *ele; - struct chan *chan; - - list_for_each(ele, chans) { - chan = list_entry(ele, struct chan, list); - if (chan->primary && chan->output) - return chan->fd; - } - return -1; -} - void chan_interrupt(struct list_head *chans, struct delayed_work *task, struct tty_struct *tty, int irq) { diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 2c898c4d6b6..10b86e1cc65 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -304,7 +304,7 @@ int line_ioctl(struct tty_struct *tty, struct file * file, break; if (i == ARRAY_SIZE(tty_ioctls)) { printk(KERN_ERR "%s: %s: unknown ioctl: 0x%x\n", - __FUNCTION__, tty->name, cmd); + __func__, tty->name, cmd); } ret = -ENOIOCTLCMD; break; diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c index 822092f149b..8c4378a76d6 100644 --- a/arch/um/drivers/mcast_kern.c +++ b/arch/um/drivers/mcast_kern.c @@ -58,7 +58,7 @@ static const struct net_kern_info mcast_kern_info = { .write = mcast_write, }; -int mcast_setup(char *str, char **mac_out, void *data) +static int mcast_setup(char *str, char **mac_out, void *data) { struct mcast_init *init = data; char *port_str = NULL, *ttl_str = NULL, *remain; diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c index 13af2f03ed8..f8cf4c8bede 100644 --- a/arch/um/drivers/mconsole_user.c +++ b/arch/um/drivers/mconsole_user.c @@ -39,7 +39,7 @@ static struct mconsole_command commands[] = { /* Initialized in mconsole_init, which is an initcall */ char mconsole_socket_name[256]; -int mconsole_reply_v0(struct mc_request *req, char *reply) +static int mconsole_reply_v0(struct mc_request *req, char *reply) { struct iovec iov; struct msghdr msg; diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 1d43bdfc20c..5b4ca8d9368 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -116,7 +116,7 @@ static void uml_dev_close(struct work_struct *work) dev_close(lp->dev); } -irqreturn_t uml_net_interrupt(int irq, void *dev_id) +static irqreturn_t uml_net_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; struct uml_net_private *lp = dev->priv; @@ -296,7 +296,7 @@ static struct ethtool_ops uml_net_ethtool_ops = { .get_link = ethtool_op_get_link, }; -void uml_net_user_timer_expire(unsigned long _conn) +static void uml_net_user_timer_expire(unsigned long _conn) { #ifdef undef struct connection *conn = (struct connection *)_conn; @@ -786,7 +786,7 @@ static int uml_inetaddr_event(struct notifier_block *this, unsigned long event, } /* uml_net_init shouldn't be called twice on two CPUs at the same time */ -struct notifier_block uml_inetaddr_notifier = { +static struct notifier_block uml_inetaddr_notifier = { .notifier_call = uml_inetaddr_event, }; diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c index addd7590265..d269ca387f1 100644 --- a/arch/um/drivers/port_user.c +++ b/arch/um/drivers/port_user.c @@ -153,7 +153,7 @@ struct port_pre_exec_data { int pipe_fd; }; -void port_pre_exec(void *arg) +static void port_pre_exec(void *arg) { struct port_pre_exec_data *data = arg; diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c index 6b4a0f9e38d..d19faec7046 100644 --- a/arch/um/drivers/slip_kern.c +++ b/arch/um/drivers/slip_kern.c @@ -13,7 +13,7 @@ struct slip_init { char *gate_addr; }; -void slip_init(struct net_device *dev, void *data) +static void slip_init(struct net_device *dev, void *data) { struct uml_net_private *private; struct slip_data *spri; @@ -57,7 +57,7 @@ static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) (struct slip_data *) &lp->user); } -const struct net_kern_info slip_kern_info = { +static const struct net_kern_info slip_kern_info = { .init = slip_init, .protocol = slip_protocol, .read = slip_read, diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c index cec0c33cdd3..49266f6108c 100644 --- a/arch/um/drivers/stdio_console.c +++ b/arch/um/drivers/stdio_console.c @@ -34,7 +34,7 @@ static struct tty_driver *console_driver; -void stdio_announce(char *dev_name, int dev) +static void stdio_announce(char *dev_name, int dev) { printk(KERN_INFO "Virtual console %d assigned device '%s'\n", dev, dev_name); @@ -158,7 +158,7 @@ static struct console stdiocons = { .index = -1, }; -int stdio_init(void) +static int stdio_init(void) { char *new_title; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index be3a2797dac..5e45e39a8a8 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -72,18 +72,6 @@ struct io_thread_req { int error; }; -extern int open_ubd_file(char *file, struct openflags *openflags, int shared, - char **backing_file_out, int *bitmap_offset_out, - unsigned long *bitmap_len_out, int *data_offset_out, - int *create_cow_out); -extern int create_cow_file(char *cow_file, char *backing_file, - struct openflags flags, int sectorsize, - int alignment, int *bitmap_offset_out, - unsigned long *bitmap_len_out, - int *data_offset_out); -extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -extern void do_io(struct io_thread_req *req); - static inline int ubd_test_bit(__u64 bit, unsigned char *data) { __u64 n; @@ -200,7 +188,7 @@ struct ubd { } /* Protected by ubd_lock */ -struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; +static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD }; /* Only changed by fake_ide_setup which is a setup */ static int fake_ide = 0; @@ -463,7 +451,7 @@ __uml_help(udb_setup, static void do_ubd_request(struct request_queue * q); /* Only changed by ubd_init, which is an initcall. */ -int thread_fd = -1; +static int thread_fd = -1; static void ubd_end_request(struct request *req, int bytes, int error) { @@ -531,7 +519,7 @@ static irqreturn_t ubd_intr(int irq, void *dev) /* Only changed by ubd_init, which is an initcall. */ static int io_pid = -1; -void kill_io_thread(void) +static void kill_io_thread(void) { if(io_pid != -1) os_kill_process(io_pid, 1); @@ -547,6 +535,192 @@ static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) return os_file_size(file, size_out); } +static int read_cow_bitmap(int fd, void *buf, int offset, int len) +{ + int err; + + err = os_seek_file(fd, offset); + if (err < 0) + return err; + + err = os_read_file(fd, buf, len); + if (err < 0) + return err; + + return 0; +} + +static int backing_file_mismatch(char *file, __u64 size, time_t mtime) +{ + unsigned long modtime; + unsigned long long actual; + int err; + + err = os_file_modtime(file, &modtime); + if (err < 0) { + printk(KERN_ERR "Failed to get modification time of backing " + "file \"%s\", err = %d\n", file, -err); + return err; + } + + err = os_file_size(file, &actual); + if (err < 0) { + printk(KERN_ERR "Failed to get size of backing file \"%s\", " + "err = %d\n", file, -err); + return err; + } + + if (actual != size) { + /*__u64 can be a long on AMD64 and with %lu GCC complains; so + * the typecast.*/ + printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header " + "vs backing file\n", (unsigned long long) size, actual); + return -EINVAL; + } + if (modtime != mtime) { + printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs " + "backing file\n", mtime, modtime); + return -EINVAL; + } + return 0; +} + +static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow) +{ + struct uml_stat buf1, buf2; + int err; + + if (from_cmdline == NULL) + return 0; + if (!strcmp(from_cmdline, from_cow)) + return 0; + + err = os_stat_file(from_cmdline, &buf1); + if (err < 0) { + printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline, + -err); + return 0; + } + err = os_stat_file(from_cow, &buf2); + if (err < 0) { + printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow, + -err); + return 1; + } + if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) + return 0; + + printk(KERN_ERR "Backing file mismatch - \"%s\" requested, " + "\"%s\" specified in COW header of \"%s\"\n", + from_cmdline, from_cow, cow); + return 1; +} + +static int open_ubd_file(char *file, struct openflags *openflags, int shared, + char **backing_file_out, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out, + int *create_cow_out) +{ + time_t mtime; + unsigned long long size; + __u32 version, align; + char *backing_file; + int fd, err, sectorsize, asked_switch, mode = 0644; + + fd = os_open_file(file, *openflags, mode); + if (fd < 0) { + if ((fd == -ENOENT) && (create_cow_out != NULL)) + *create_cow_out = 1; + if (!openflags->w || + ((fd != -EROFS) && (fd != -EACCES))) + return fd; + openflags->w = 0; + fd = os_open_file(file, *openflags, mode); + if (fd < 0) + return fd; + } + + if (shared) + printk(KERN_INFO "Not locking \"%s\" on the host\n", file); + else { + err = os_lock_file(fd, openflags->w); + if (err < 0) { + printk(KERN_ERR "Failed to lock '%s', err = %d\n", + file, -err); + goto out_close; + } + } + + /* Successful return case! */ + if (backing_file_out == NULL) + return fd; + + err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, + &size, §orsize, &align, bitmap_offset_out); + if (err && (*backing_file_out != NULL)) { + printk(KERN_ERR "Failed to read COW header from COW file " + "\"%s\", errno = %d\n", file, -err); + goto out_close; + } + if (err) + return fd; + + asked_switch = path_requires_switch(*backing_file_out, backing_file, + file); + + /* Allow switching only if no mismatch. */ + if (asked_switch && !backing_file_mismatch(*backing_file_out, size, + mtime)) { + printk(KERN_ERR "Switching backing file to '%s'\n", + *backing_file_out); + err = write_cow_header(file, fd, *backing_file_out, + sectorsize, align, &size); + if (err) { + printk(KERN_ERR "Switch failed, errno = %d\n", -err); + goto out_close; + } + } else { + *backing_file_out = backing_file; + err = backing_file_mismatch(*backing_file_out, size, mtime); + if (err) + goto out_close; + } + + cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, + bitmap_len_out, data_offset_out); + + return fd; + out_close: + os_close_file(fd); + return err; +} + +static int create_cow_file(char *cow_file, char *backing_file, + struct openflags flags, + int sectorsize, int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out) +{ + int err, fd; + + flags.c = 1; + fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL); + if (fd < 0) { + err = fd; + printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n", + cow_file, -err); + goto out; + } + + err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, + bitmap_offset_out, bitmap_len_out, + data_offset_out); + if (!err) + return fd; + os_close_file(fd); + out: + return err; +} + static void ubd_close_dev(struct ubd *ubd_dev) { os_close_file(ubd_dev->fd); @@ -1166,185 +1340,6 @@ static int ubd_ioctl(struct inode * inode, struct file * file, return -EINVAL; } -static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow) -{ - struct uml_stat buf1, buf2; - int err; - - if(from_cmdline == NULL) - return 0; - if(!strcmp(from_cmdline, from_cow)) - return 0; - - err = os_stat_file(from_cmdline, &buf1); - if(err < 0){ - printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); - return 0; - } - err = os_stat_file(from_cow, &buf2); - if(err < 0){ - printk("Couldn't stat '%s', err = %d\n", from_cow, -err); - return 1; - } - if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) - return 0; - - printk("Backing file mismatch - \"%s\" requested,\n" - "\"%s\" specified in COW header of \"%s\"\n", - from_cmdline, from_cow, cow); - return 1; -} - -static int backing_file_mismatch(char *file, __u64 size, time_t mtime) -{ - unsigned long modtime; - unsigned long long actual; - int err; - - err = os_file_modtime(file, &modtime); - if(err < 0){ - printk("Failed to get modification time of backing file " - "\"%s\", err = %d\n", file, -err); - return err; - } - - err = os_file_size(file, &actual); - if(err < 0){ - printk("Failed to get size of backing file \"%s\", " - "err = %d\n", file, -err); - return err; - } - - if(actual != size){ - /*__u64 can be a long on AMD64 and with %lu GCC complains; so - * the typecast.*/ - printk("Size mismatch (%llu vs %llu) of COW header vs backing " - "file\n", (unsigned long long) size, actual); - return -EINVAL; - } - if(modtime != mtime){ - printk("mtime mismatch (%ld vs %ld) of COW header vs backing " - "file\n", mtime, modtime); - return -EINVAL; - } - return 0; -} - -int read_cow_bitmap(int fd, void *buf, int offset, int len) -{ - int err; - - err = os_seek_file(fd, offset); - if(err < 0) - return err; - - err = os_read_file(fd, buf, len); - if(err < 0) - return err; - - return 0; -} - -int open_ubd_file(char *file, struct openflags *openflags, int shared, - char **backing_file_out, int *bitmap_offset_out, - unsigned long *bitmap_len_out, int *data_offset_out, - int *create_cow_out) -{ - time_t mtime; - unsigned long long size; - __u32 version, align; - char *backing_file; - int fd, err, sectorsize, asked_switch, mode = 0644; - - fd = os_open_file(file, *openflags, mode); - if (fd < 0) { - if ((fd == -ENOENT) && (create_cow_out != NULL)) - *create_cow_out = 1; - if (!openflags->w || - ((fd != -EROFS) && (fd != -EACCES))) - return fd; - openflags->w = 0; - fd = os_open_file(file, *openflags, mode); - if (fd < 0) - return fd; - } - - if(shared) - printk("Not locking \"%s\" on the host\n", file); - else { - err = os_lock_file(fd, openflags->w); - if(err < 0){ - printk("Failed to lock '%s', err = %d\n", file, -err); - goto out_close; - } - } - - /* Successful return case! */ - if(backing_file_out == NULL) - return fd; - - err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, - &size, §orsize, &align, bitmap_offset_out); - if(err && (*backing_file_out != NULL)){ - printk("Failed to read COW header from COW file \"%s\", " - "errno = %d\n", file, -err); - goto out_close; - } - if(err) - return fd; - - asked_switch = path_requires_switch(*backing_file_out, backing_file, file); - - /* Allow switching only if no mismatch. */ - if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) { - printk("Switching backing file to '%s'\n", *backing_file_out); - err = write_cow_header(file, fd, *backing_file_out, - sectorsize, align, &size); - if (err) { - printk("Switch failed, errno = %d\n", -err); - goto out_close; - } - } else { - *backing_file_out = backing_file; - err = backing_file_mismatch(*backing_file_out, size, mtime); - if (err) - goto out_close; - } - - cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, - bitmap_len_out, data_offset_out); - - return fd; - out_close: - os_close_file(fd); - return err; -} - -int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, - int sectorsize, int alignment, int *bitmap_offset_out, - unsigned long *bitmap_len_out, int *data_offset_out) -{ - int err, fd; - - flags.c = 1; - fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL); - if(fd < 0){ - err = fd; - printk("Open of COW file '%s' failed, errno = %d\n", cow_file, - -err); - goto out; - } - - err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, - bitmap_offset_out, bitmap_len_out, - data_offset_out); - if(!err) - return fd; - os_close_file(fd); - out: - return err; -} - static int update_bitmap(struct io_thread_req *req) { int n; @@ -1369,7 +1364,7 @@ static int update_bitmap(struct io_thread_req *req) return 0; } -void do_io(struct io_thread_req *req) +static void do_io(struct io_thread_req *req) { char *buf; unsigned long len; diff --git a/arch/um/include/chan_kern.h b/arch/um/include/chan_kern.h index 624b5100a3c..1e651457e04 100644 --- a/arch/um/include/chan_kern.h +++ b/arch/um/include/chan_kern.h @@ -31,7 +31,6 @@ extern void chan_interrupt(struct list_head *chans, struct delayed_work *task, struct tty_struct *tty, int irq); extern int parse_chan_pair(char *str, struct line *line, int device, const struct chan_opts *opts, char **error_out); -extern int open_chan(struct list_head *chans); extern int write_chan(struct list_head *chans, const char *buf, int len, int write_irq); extern int console_write_chan(struct list_head *chans, const char *buf, @@ -45,7 +44,6 @@ extern void close_chan(struct list_head *chans, int delay_free_irq); extern int chan_window_size(struct list_head *chans, unsigned short *rows_out, unsigned short *cols_out); -extern int chan_out_fd(struct list_head *chans); extern int chan_config_string(struct list_head *chans, char *str, int size, char **error_out); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index a6c1dd1cf5a..56deed62344 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -115,7 +115,7 @@ static int have_root __initdata = 0; /* Set in uml_mem_setup and modified in linux_main */ long long physmem_size = 32 * 1024 * 1024; -static char *usage_string = +static const char *usage_string = "User Mode Linux v%s\n" " available at http://user-mode-linux.sourceforge.net/\n\n"; @@ -202,7 +202,7 @@ static void __init uml_checksetup(char *line, int *add) p = &__uml_setup_start; while (p < &__uml_setup_end) { - int n; + size_t n; n = strlen(p->str); if (!strncmp(line, p->str, n) && p->setup_func(line + n, add)) @@ -258,7 +258,8 @@ int __init linux_main(int argc, char **argv) { unsigned long avail, diff; unsigned long virtmem_size, max_physmem; - unsigned int i, add; + unsigned int i; + int add; char * mode; for (i = 1; i < argc; i++) { diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index f4bd349d441..f25c29a12d0 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -14,6 +14,7 @@ #include "os.h" #include "um_malloc.h" #include "user.h" +#include <linux/limits.h> struct helper_data { void (*pre_exec)(void*); diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index b616e15638f..997d01944f9 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -25,15 +25,15 @@ #include "registers.h" #include "skas_ptrace.h" -static int ptrace_child(void) +static void ptrace_child(void) { int ret; /* Calling os_getpid because some libcs cached getpid incorrectly */ int pid = os_getpid(), ppid = getppid(); int sc_result; - change_sig(SIGWINCH, 0); - if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) { + if (change_sig(SIGWINCH, 0) < 0 || + ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) { perror("ptrace"); kill(pid, SIGKILL); } @@ -75,9 +75,8 @@ static void fatal(char *fmt, ...) va_list list; va_start(list, fmt); - vprintf(fmt, list); + vfprintf(stderr, fmt, list); va_end(list); - fflush(stdout); exit(1); } @@ -87,9 +86,8 @@ static void non_fatal(char *fmt, ...) va_list list; va_start(list, fmt); - vprintf(fmt, list); + vfprintf(stderr, fmt, list); va_end(list); - fflush(stdout); } static int start_ptraced_child(void) @@ -495,7 +493,7 @@ int __init parse_iomem(char *str, int *add) driver = str; file = strchr(str,','); if (file == NULL) { - printf("parse_iomem : failed to parse iomem\n"); + fprintf(stderr, "parse_iomem : failed to parse iomem\n"); goto out; } *file = '\0'; diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c index 48d211b3d9a..ccb49b0aff5 100644 --- a/arch/um/os-Linux/sys-i386/task_size.c +++ b/arch/um/os-Linux/sys-i386/task_size.c @@ -88,7 +88,10 @@ unsigned long os_get_task_size(void) sa.sa_handler = segfault; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_NODEFER; - sigaction(SIGSEGV, &sa, &old); + if (sigaction(SIGSEGV, &sa, &old)) { + perror("os_get_task_size"); + exit(1); + } if (!page_ok(bottom)) { fprintf(stderr, "Address 0x%x no good?\n", @@ -110,11 +113,12 @@ unsigned long os_get_task_size(void) out: /* Restore the old SIGSEGV handling */ - sigaction(SIGSEGV, &old, NULL); - + if (sigaction(SIGSEGV, &old, NULL)) { + perror("os_get_task_size"); + exit(1); + } top <<= UM_KERN_PAGE_SHIFT; printf("0x%x\n", top); - fflush(stdout); return top; } diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 964dc1a04c3..598b5c1903a 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -6,7 +6,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \ sys_call_table.o tls.o -subarch-obj-y = lib/bitops_32.o lib/semaphore_32.o lib/string_32.o +subarch-obj-y = lib/semaphore_32.o lib/string_32.o subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 3c22de53208..c8b4cce9cfe 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -10,7 +10,7 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \ obj-$(CONFIG_MODULES) += um_module.o -subarch-obj-y = lib/bitops_64.o lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o +subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o ldt-y = ../sys-i386/ldt.o diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4d350b5cbc7..a12dbb2b93f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -86,9 +86,6 @@ config GENERIC_GPIO config ARCH_MAY_HAVE_PC_FDC def_bool y -config DMI - def_bool y - config RWSEM_GENERIC_SPINLOCK def_bool !X86_XADD @@ -114,6 +111,9 @@ config GENERIC_TIME_VSYSCALL config ARCH_HAS_CPU_RELAX def_bool y +config ARCH_HAS_CACHE_LINE_SIZE + def_bool y + config HAVE_SETUP_PER_CPU_AREA def_bool X86_64 || (X86_SMP && !X86_VOYAGER) @@ -142,6 +142,9 @@ config AUDIT_ARCH config ARCH_SUPPORTS_AOUT def_bool y +config ARCH_SUPPORTS_OPTIMIZED_INLINING + def_bool y + # Use the generic interrupt handling code in kernel/irq/: config GENERIC_HARDIRQS bool @@ -370,6 +373,25 @@ config VMI at the moment), by linking the kernel to a GPL-ed ROM module provided by the hypervisor. +config KVM_CLOCK + bool "KVM paravirtualized clock" + select PARAVIRT + depends on !(X86_VISWS || X86_VOYAGER) + help + Turning on this option will allow you to run a paravirtualized clock + when running over the KVM hypervisor. Instead of relying on a PIT + (or probably other) emulation by the underlying device model, the host + provides the guest with timing infrastructure such as time of day, and + system time + +config KVM_GUEST + bool "KVM Guest support" + select PARAVIRT + depends on !(X86_VISWS || X86_VOYAGER) + help + This option enables various optimizations for running under the KVM + hypervisor. + source "arch/x86/lguest/Kconfig" config PARAVIRT @@ -460,6 +482,15 @@ config HPET_EMULATE_RTC # Mark as embedded because too many people got it wrong. # The code disables itself when not needed. +config DMI + default y + bool "Enable DMI scanning" if EMBEDDED + help + Enabled scanning of DMI to identify machine quirks. Say Y + here unless you have verified that your setup is not + affected by entries in the DMI blacklist. Required by PNP + BIOS code. + config GART_IOMMU bool "GART IOMMU support" if EMBEDDED default y @@ -1049,9 +1080,9 @@ config MTRR See <file:Documentation/mtrr.txt> for more information. config X86_PAT - def_bool y + bool prompt "x86 PAT support" - depends on MTRR && NONPROMISC_DEVMEM + depends on MTRR help Use PAT attributes to setup page level cache control. diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 57072f2716f..7ef18b01f0b 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -21,8 +21,8 @@ config M386 Here are the settings recommended for greatest speed: - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI - 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels - will run on a 386 class machine. + 486DLC/DLC2, and UMC 486SX-S. Only "386" kernels will run on a 386 + class machine. - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. - "586" for generic Pentium CPUs lacking the TSC @@ -278,6 +278,11 @@ config GENERIC_CPU endchoice +config X86_CPU + def_bool y + select GENERIC_FIND_FIRST_BIT + select GENERIC_FIND_NEXT_BIT + config X86_GENERIC bool "Generic x86 support" depends on X86_32 @@ -398,7 +403,7 @@ config X86_TSC # generates cmov. config X86_CMOV def_bool y - depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7) + depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || X86_64) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 610aaecc19f..5b1979a45a1 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -5,6 +5,17 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" +config NONPROMISC_DEVMEM + bool "Disable promiscuous /dev/mem" + help + The /dev/mem file by default only allows userspace access to PCI + space and the BIOS code and data regions. This is sufficient for + dosemu and X and all common users of /dev/mem. With this config + option, you allow userspace access to all of memory, including + kernel and userspace memory. Accidental access to this is + obviously disasterous, but specific access can be used by people + debugging the kernel. + config EARLY_PRINTK bool "Early printk" if EMBEDDED default y @@ -246,3 +257,16 @@ config CPA_DEBUG Do change_page_attr() self-tests every 30 seconds. endmenu + +config OPTIMIZE_INLINING + bool "Allow gcc to uninline functions marked 'inline'" + default y + help + This option determines if the kernel forces gcc to inline the functions + developers have marked 'inline'. Doing so takes away freedom from gcc to + do what it thinks is best, which is desirable for the gcc 3.x series of + compilers. The gcc 4.x series have a rewritten inlining algorithm and + disabling this option will generate a smaller kernel there. Hopefully + this algorithm is so good that allowing gcc4 to make the decision can + become the default in the future, until then this option is there to + test gcc for this. diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore index b1bdc4c6f9f..172cf8a98bd 100644 --- a/arch/x86/boot/.gitignore +++ b/arch/x86/boot/.gitignore @@ -1,7 +1,8 @@ bootsect bzImage +cpustr.h +mkcpustr +offsets.h setup setup.bin setup.elf -cpustr.h -mkcpustr diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 6d2df8d61c5..af86e431acf 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -120,7 +120,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x0208 # header version number (>= 0x0105) + .word 0x0209 # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -227,6 +227,10 @@ hardware_subarch_data: .quad 0 payload_offset: .long input_data payload_length: .long input_data_end-input_data +setup_data: .quad 0 # 64-bit physical pointer to + # single linked list of + # struct setup_data + # End of setup header ##################################################### .section ".inittext", "ax" diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 3df340b54e5..ad7ddaaff58 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1421,6 +1421,7 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set +CONFIG_OPTIMIZE_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index eef98cb00c6..2d6f5b2809d 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1346,6 +1346,7 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set +CONFIG_OPTIMIZE_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 05e155d3fb6..bbed3a26ce5 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -499,11 +499,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, regs->cs = __USER32_CS; regs->ss = __USER32_DS; - set_fs(USER_DS); - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - #if DEBUG_SIG printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", current->comm, current->pid, frame, regs->ip, frame->pretcode); @@ -599,11 +594,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->cs = __USER32_CS; regs->ss = __USER32_DS; - set_fs(USER_DS); - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - #if DEBUG_SIG printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", current->comm, current->pid, frame, regs->ip, frame->pretcode); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index ae7158bce4d..b5e329da166 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -430,7 +430,7 @@ ia32_sys_call_table: .quad sys_setuid16 .quad sys_getuid16 .quad compat_sys_stime /* stime */ /* 25 */ - .quad sys32_ptrace /* ptrace */ + .quad compat_sys_ptrace /* ptrace */ .quad sys_alarm .quad sys_fstat /* (old)fstat */ .quad sys_pause diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 90e092d0af0..fa19c381954 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -80,6 +80,8 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o +obj-$(CONFIG_KVM_GUEST) += kvm.o +obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o ifdef CONFIG_INPUT_PCSPKR diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 057ccf1d5ad..977ed5cdeaa 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -697,10 +697,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) #define HPET_RESOURCE_NAME_SIZE 9 hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); - if (!hpet_res) - return 0; - - memset(hpet_res, 0, sizeof(*hpet_res)); hpet_res->name = (void *)&hpet_res[1]; hpet_res->flags = IORESOURCE_MEM; snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u", diff --git a/arch/x86/kernel/acpi/realmode/.gitignore b/arch/x86/kernel/acpi/realmode/.gitignore new file mode 100644 index 00000000000..58f1f48a58f --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/.gitignore @@ -0,0 +1,3 @@ +wakeup.bin +wakeup.elf +wakeup.lds diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index df4099dc1c6..65c7857a90d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -511,31 +511,30 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) unsigned long flags; char *vaddr; int nr_pages = 2; + struct page *pages[2]; + int i; - BUG_ON(len > sizeof(long)); - BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1)) - - ((long)addr & ~(sizeof(long) - 1))); - if (kernel_text_address((unsigned long)addr)) { - struct page *pages[2] = { virt_to_page(addr), - virt_to_page(addr + PAGE_SIZE) }; - if (!pages[1]) - nr_pages = 1; - vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); - BUG_ON(!vaddr); - local_irq_save(flags); - memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - local_irq_restore(flags); - vunmap(vaddr); + if (!core_kernel_text((unsigned long)addr)) { + pages[0] = vmalloc_to_page(addr); + pages[1] = vmalloc_to_page(addr + PAGE_SIZE); } else { - /* - * modules are in vmalloc'ed memory, always writable. - */ - local_irq_save(flags); - memcpy(addr, opcode, len); - local_irq_restore(flags); + pages[0] = virt_to_page(addr); + WARN_ON(!PageReserved(pages[0])); + pages[1] = virt_to_page(addr + PAGE_SIZE); } + BUG_ON(!pages[0]); + if (!pages[1]) + nr_pages = 1; + vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); + BUG_ON(!vaddr); + local_irq_save(flags); + memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); + local_irq_restore(flags); + vunmap(vaddr); sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ + for (i = 0; i < len; i++) + BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); return addr; } diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 8317401170b..4b99b1bdeb6 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -451,7 +451,8 @@ void __init setup_boot_APIC_clock(void) } /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32); + lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, + lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index bf83157337e..5910020c3f2 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -360,7 +360,8 @@ static void __init calibrate_APIC_clock(void) result / 1000 / 1000, result / 1000 % 1000); /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, + lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index f0030a0999c..e4ea362e848 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -904,6 +904,7 @@ recalc: original_pm_idle(); else default_idle(); + local_irq_disable(); jiffies_since_last_check = jiffies - last_jiffies; if (jiffies_since_last_check > idle_period) goto recalc; @@ -911,6 +912,8 @@ recalc: if (apm_idle_done) apm_do_busy(); + + local_irq_enable(); } /** diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ee7c45235e5..a0c6f819088 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_X86_32) += cyrix.o obj-$(CONFIG_X86_32) += centaur.o obj-$(CONFIG_X86_32) += transmeta.o obj-$(CONFIG_X86_32) += intel.o -obj-$(CONFIG_X86_32) += nexgen.o obj-$(CONFIG_X86_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0173065dc3b..24586682829 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -343,10 +343,4 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_size_cache = amd_size_cache, }; -int __init amd_init_cpu(void) -{ - cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; - return 0; -} - cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index e2d870de837..8db8f73503b 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -339,6 +339,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); unsigned int freq; + unsigned int cached_freq; dprintk("get_cur_freq_on_cpu (%d)\n", cpu); @@ -347,7 +348,16 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return 0; } + cached_freq = data->freq_table[data->acpi_data->state].frequency; freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); + if (freq != cached_freq) { + /* + * The dreaded BIOS frequency change behind our back. + * Force set the frequency on next target call. + */ + data->resume = 1; + } + dprintk("cur freq = %u\n", freq); return freq; diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 9a699ed0359..e07e8c068ae 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -49,7 +49,7 @@ static int banks; static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; static unsigned long notify_user; static int rip_msr; -static int mce_bootlog = 1; +static int mce_bootlog = -1; static atomic_t mce_events; static char trigger[128]; @@ -471,13 +471,15 @@ static void mce_init(void *dummy) static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) { /* This should be disabled by the BIOS, but isn't always */ - if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { - /* disable GART TBL walk error reporting, which trips off - incorrectly with the IOMMU & 3ware & Cerberus. */ - clear_bit(10, &bank[4]); - /* Lots of broken BIOS around that don't clear them - by default and leave crap in there. Don't log. */ - mce_bootlog = 0; + if (c->x86_vendor == X86_VENDOR_AMD) { + if(c->x86 == 15) + /* disable GART TBL walk error reporting, which trips off + incorrectly with the IOMMU & 3ware & Cerberus. */ + clear_bit(10, &bank[4]); + if(c->x86 <= 17 && mce_bootlog < 0) + /* Lots of broken BIOS around that don't clear them + by default and leave crap in there. Don't log. */ + mce_bootlog = 0; } } diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c deleted file mode 100644 index 5d5e1c13412..00000000000 --- a/arch/x86/kernel/cpu/nexgen.c +++ /dev/null @@ -1,59 +0,0 @@ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/string.h> -#include <asm/processor.h> - -#include "cpu.h" - -/* - * Detect a NexGen CPU running without BIOS hypercode new enough - * to have CPUID. (Thanks to Herbert Oppmann) - */ - -static int __cpuinit deep_magic_nexgen_probe(void) -{ - int ret; - - __asm__ __volatile__ ( - " movw $0x5555, %%ax\n" - " xorw %%dx,%%dx\n" - " movw $2, %%cx\n" - " divw %%cx\n" - " movl $0, %%eax\n" - " jnz 1f\n" - " movl $1, %%eax\n" - "1:\n" - : "=a" (ret) : : "cx", "dx"); - return ret; -} - -static void __cpuinit init_nexgen(struct cpuinfo_x86 *c) -{ - c->x86_cache_size = 256; /* A few had 1 MB... */ -} - -static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c) -{ - /* Detect NexGen with old hypercode */ - if (deep_magic_nexgen_probe()) - strcpy(c->x86_vendor_id, "NexGenDriven"); -} - -static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { - .c_vendor = "Nexgen", - .c_ident = { "NexGenDriven" }, - .c_models = { - { .vendor = X86_VENDOR_NEXGEN, - .family = 5, - .model_names = { [1] = "Nx586" } - }, - }, - .c_init = init_nexgen, - .c_identify = nexgen_identify, -}; - -int __init nexgen_init_cpu(void) -{ - cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; - return 0; -} diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index b943e10ad81..f9ae93adffe 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -614,16 +614,6 @@ static struct wd_ops intel_arch_wd_ops __read_mostly = { .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, }; -static struct wd_ops coreduo_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, -}; - static void probe_nmi_watchdog(void) { switch (boot_cpu_data.x86_vendor) { @@ -637,8 +627,8 @@ static void probe_nmi_watchdog(void) /* Work around Core Duo (Yonah) errata AE49 where perfctr1 doesn't have a working enable bit. */ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { - wd_ops = &coreduo_wd_ops; - break; + intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; + intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; } if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { wd_ops = &intel_arch_wd_ops; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 2251d0ae957..26855381790 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -25,6 +25,7 @@ #include <asm/hpet.h> #include <linux/kdebug.h> #include <asm/smp.h> +#include <asm/reboot.h> #include <mach_ipi.h> @@ -117,7 +118,7 @@ static void nmi_shootdown_cpus(void) } #endif -void machine_crash_shutdown(struct pt_regs *regs) +void native_machine_crash_shutdown(struct pt_regs *regs) { /* This function is only called after the system * has panicked or is otherwise in a critical state. diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index cbd42e51cb0..645ee5e32a2 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -84,14 +84,41 @@ void __init reserve_early(unsigned long start, unsigned long end, char *name) strncpy(r->name, name, sizeof(r->name) - 1); } -void __init early_res_to_bootmem(void) +void __init free_early(unsigned long start, unsigned long end) +{ + struct early_res *r; + int i, j; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (start == r->start && end == r->end) + break; + } + if (i >= MAX_EARLY_RES || !early_res[i].end) + panic("free_early on not reserved area: %lx-%lx!", start, end); + + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) + ; + + memcpy(&early_res[i], &early_res[i + 1], + (j - 1 - i) * sizeof(struct early_res)); + + early_res[j - 1].end = 0; +} + +void __init early_res_to_bootmem(unsigned long start, unsigned long end) { int i; + unsigned long final_start, final_end; for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { struct early_res *r = &early_res[i]; - printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, - r->start, r->end - 1, r->name); - reserve_bootmem_generic(r->start, r->end - r->start); + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, + final_start, final_end - 1, r->name); + reserve_bootmem_generic(final_start, final_end - final_start); } } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f0f8934fc30..2a609dc3271 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -409,7 +409,7 @@ restore_nocheck_notrace: irq_return: INTERRUPT_RETURN .section .fixup,"ax" -iret_exc: +ENTRY(iret_exc) pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper) ENDPROC(kernel_thread_helper) #ifdef CONFIG_XEN +/* Xen doesn't set %esp to be precisely what the normal sysenter + entrypoint expects, so fix it up before using the normal path. */ +ENTRY(xen_sysenter_target) + RING0_INT_FRAME + addl $5*4, %esp /* remove xen-provided frame */ + jmp sysenter_past_esp + ENTRY(xen_hypervisor_callback) CFI_STARTPROC pushl $0 @@ -1035,8 +1042,9 @@ ENTRY(xen_hypervisor_callback) cmpl $xen_iret_end_crit,%eax jae 1f - call xen_iret_crit_fixup + jmp xen_iret_crit_fixup +ENTRY(xen_do_upcall) 1: mov %esp, %eax call xen_evtchn_do_upcall jmp ret_from_intr diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 9546ef408b9..021624c8358 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -51,7 +51,7 @@ void __init setup_apic_routing(void) else #endif - if (cpus_weight(cpu_possible_map) <= 8) + if (num_possible_cpus() <= 8) genapic = &apic_flat; else genapic = &apic_physflat; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 993c7677325..e25c57b8aa8 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -11,6 +11,7 @@ #include <linux/string.h> #include <linux/percpu.h> #include <linux/start_kernel.h> +#include <linux/io.h> #include <asm/processor.h> #include <asm/proto.h> @@ -22,6 +23,7 @@ #include <asm/sections.h> #include <asm/kdebug.h> #include <asm/e820.h> +#include <asm/bios_ebda.h> static void __init zap_identity_mappings(void) { @@ -49,7 +51,6 @@ static void __init copy_bootdata(char *real_mode_data) } } -#define BIOS_EBDA_SEGMENT 0x40E #define BIOS_LOWMEM_KILOBYTES 0x413 /* @@ -80,8 +81,7 @@ static void __init reserve_ebda_region(void) lowmem <<= 10; /* start of EBDA area */ - ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); - ebda_addr <<= 4; + ebda_addr = get_bios_ebda(); /* Fixup: bios puts an EBDA in the top 64K segment */ /* of conventional memory, but does not adjust lowmem. */ @@ -101,6 +101,24 @@ static void __init reserve_ebda_region(void) reserve_early(lowmem, 0x100000, "BIOS reserved"); } +static void __init reserve_setup_data(void) +{ + struct setup_data *data; + unsigned long pa_data; + char buf[32]; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*data)); + sprintf(buf, "setup data %x", data->type); + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); + pa_data = data->next; + early_iounmap(data, sizeof(*data)); + } +} + void __init x86_64_start_kernel(char * real_mode_data) { int i; @@ -157,6 +175,7 @@ void __init x86_64_start_kernel(char * real_mode_data) #endif reserve_ebda_region(); + reserve_setup_data(); /* * At this point everything still needed from the boot loader diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 36652ea1a26..9007f9ea64e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -218,7 +218,7 @@ static void hpet_legacy_clockevent_register(void) hpet_freq = 1000000000000000ULL; do_div(hpet_freq, hpet_period); hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, 32); + NSEC_PER_SEC, hpet_clockevent.shift); /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 8540abe86ad..c1b5e3ece1f 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -115,7 +115,8 @@ void __init setup_pit_timer(void) * IO_APIC has been initialized. */ pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); - pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); + pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, + pit_clockevent.shift); pit_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_clockevent); pit_clockevent.min_delta_ns = @@ -224,7 +225,8 @@ static int __init init_pit_clocksource(void) pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) return 0; - clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); + clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, + clocksource_pit.shift); return clocksource_register(&clocksource_pit); } arch_initcall(init_pit_clocksource); diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 2e2f42074e1..a40d54fc1fd 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -2068,7 +2068,7 @@ static void __init setup_nmi(void) * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static inline void __init unlock_ExtINT_logic(void) { int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; @@ -2444,6 +2444,7 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); spin_lock_irqsave(&vector_lock, flags); + clear_bit(irq_vector[irq], used_vectors); irq_vector[irq] = 0; spin_unlock_irqrestore(&vector_lock, flags); } diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9ba11d07920..ef1a8dfcc52 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1599,7 +1599,7 @@ static void __init setup_nmi(void) * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static inline void __init unlock_ExtINT_logic(void) { int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 6ea67b76a21..00bda7bcda6 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs) : "=a" (arg1), "=d" (arg2), "=b" (bx) : "0" (irq), "1" (desc), "2" (isp), "D" (desc->handle_irq) - : "memory", "cc" + : "memory", "cc", "ecx" ); } else #endif diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 73354302fda..c0320599171 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -6,23 +6,171 @@ * * This file is released under the GPLv2. */ - #include <linux/debugfs.h> +#include <linux/uaccess.h> #include <linux/stat.h> #include <linux/init.h> +#include <linux/io.h> +#include <linux/mm.h> #include <asm/setup.h> #ifdef CONFIG_DEBUG_BOOT_PARAMS +struct setup_data_node { + u64 paddr; + u32 type; + u32 len; +}; + +static ssize_t +setup_data_read(struct file *file, char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct setup_data_node *node = file->private_data; + unsigned long remain; + loff_t pos = *ppos; + struct page *pg; + void *p; + u64 pa; + + if (pos < 0) + return -EINVAL; + if (pos >= node->len) + return 0; + + if (count > node->len - pos) + count = node->len - pos; + pa = node->paddr + sizeof(struct setup_data) + pos; + pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); + if (PageHighMem(pg)) { + p = ioremap_cache(pa, count); + if (!p) + return -ENXIO; + } else { + p = __va(pa); + } + + remain = copy_to_user(user_buf, p, count); + + if (PageHighMem(pg)) + iounmap(p); + + if (remain) + return -EFAULT; + + *ppos = pos + count; + + return count; +} + +static int setup_data_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static const struct file_operations fops_setup_data = { + .read = setup_data_read, + .open = setup_data_open, +}; + +static int __init +create_setup_data_node(struct dentry *parent, int no, + struct setup_data_node *node) +{ + struct dentry *d, *type, *data; + char buf[16]; + int error; + + sprintf(buf, "%d", no); + d = debugfs_create_dir(buf, parent); + if (!d) { + error = -ENOMEM; + goto err_return; + } + type = debugfs_create_x32("type", S_IRUGO, d, &node->type); + if (!type) { + error = -ENOMEM; + goto err_dir; + } + data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data); + if (!data) { + error = -ENOMEM; + goto err_type; + } + return 0; + +err_type: + debugfs_remove(type); +err_dir: + debugfs_remove(d); +err_return: + return error; +} + +static int __init create_setup_data_nodes(struct dentry *parent) +{ + struct setup_data_node *node; + struct setup_data *data; + int error, no = 0; + struct dentry *d; + struct page *pg; + u64 pa_data; + + d = debugfs_create_dir("setup_data", parent); + if (!d) { + error = -ENOMEM; + goto err_return; + } + + pa_data = boot_params.hdr.setup_data; + + while (pa_data) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) { + error = -ENOMEM; + goto err_dir; + } + pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); + if (PageHighMem(pg)) { + data = ioremap_cache(pa_data, sizeof(*data)); + if (!data) { + error = -ENXIO; + goto err_dir; + } + } else { + data = __va(pa_data); + } + + node->paddr = pa_data; + node->type = data->type; + node->len = data->len; + error = create_setup_data_node(d, no, node); + pa_data = data->next; + + if (PageHighMem(pg)) + iounmap(data); + if (error) + goto err_dir; + no++; + } + return 0; + +err_dir: + debugfs_remove(d); +err_return: + return error; +} + static struct debugfs_blob_wrapper boot_params_blob = { - .data = &boot_params, - .size = sizeof(boot_params), + .data = &boot_params, + .size = sizeof(boot_params), }; static int __init boot_params_kdebugfs_init(void) { - int error; struct dentry *dbp, *version, *data; + int error; dbp = debugfs_create_dir("boot_params", NULL); if (!dbp) { @@ -41,7 +189,13 @@ static int __init boot_params_kdebugfs_init(void) error = -ENOMEM; goto err_version; } + error = create_setup_data_nodes(dbp); + if (error) + goto err_data; return 0; + +err_data: + debugfs_remove(data); err_version: debugfs_remove(version); err_dir: @@ -61,5 +215,4 @@ static int __init arch_kdebugfs_init(void) return error; } - arch_initcall(arch_kdebugfs_init); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c new file mode 100644 index 00000000000..8b7a3cf37d2 --- /dev/null +++ b/arch/x86/kernel/kvm.c @@ -0,0 +1,248 @@ +/* + * KVM paravirt_ops implementation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright IBM Corporation, 2007 + * Authors: Anthony Liguori <aliguori@us.ibm.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kvm_para.h> +#include <linux/cpu.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/hardirq.h> + +#define MMU_QUEUE_SIZE 1024 + +struct kvm_para_state { + u8 mmu_queue[MMU_QUEUE_SIZE]; + int mmu_queue_len; + enum paravirt_lazy_mode mode; +}; + +static DEFINE_PER_CPU(struct kvm_para_state, para_state); + +static struct kvm_para_state *kvm_para_state(void) +{ + return &per_cpu(para_state, raw_smp_processor_id()); +} + +/* + * No need for any "IO delay" on KVM + */ +static void kvm_io_delay(void) +{ +} + +static void kvm_mmu_op(void *buffer, unsigned len) +{ + int r; + unsigned long a1, a2; + + do { + a1 = __pa(buffer); + a2 = 0; /* on i386 __pa() always returns <4G */ + r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); + buffer += r; + len -= r; + } while (len); +} + +static void mmu_queue_flush(struct kvm_para_state *state) +{ + if (state->mmu_queue_len) { + kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); + state->mmu_queue_len = 0; + } +} + +static void kvm_deferred_mmu_op(void *buffer, int len) +{ + struct kvm_para_state *state = kvm_para_state(); + + if (state->mode != PARAVIRT_LAZY_MMU) { + kvm_mmu_op(buffer, len); + return; + } + if (state->mmu_queue_len + len > sizeof state->mmu_queue) + mmu_queue_flush(state); + memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); + state->mmu_queue_len += len; +} + +static void kvm_mmu_write(void *dest, u64 val) +{ + __u64 pte_phys; + struct kvm_mmu_op_write_pte wpte; + +#ifdef CONFIG_HIGHPTE + struct page *page; + unsigned long dst = (unsigned long) dest; + + page = kmap_atomic_to_page(dest); + pte_phys = page_to_pfn(page); + pte_phys <<= PAGE_SHIFT; + pte_phys += (dst & ~(PAGE_MASK)); +#else + pte_phys = (unsigned long)__pa(dest); +#endif + wpte.header.op = KVM_MMU_OP_WRITE_PTE; + wpte.pte_val = val; + wpte.pte_phys = pte_phys; + + kvm_deferred_mmu_op(&wpte, sizeof wpte); +} + +/* + * We only need to hook operations that are MMU writes. We hook these so that + * we can use lazy MMU mode to batch these operations. We could probably + * improve the performance of the host code if we used some of the information + * here to simplify processing of batched writes. + */ +static void kvm_set_pte(pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + kvm_mmu_write(pmdp, pmd_val(pmd)); +} + +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE +static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + kvm_mmu_write(ptep, 0); +} + +static void kvm_pmd_clear(pmd_t *pmdp) +{ + kvm_mmu_write(pmdp, 0); +} +#endif + +static void kvm_set_pud(pud_t *pudp, pud_t pud) +{ + kvm_mmu_write(pudp, pud_val(pud)); +} + +#if PAGETABLE_LEVELS == 4 +static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + kvm_mmu_write(pgdp, pgd_val(pgd)); +} +#endif +#endif /* PAGETABLE_LEVELS >= 3 */ + +static void kvm_flush_tlb(void) +{ + struct kvm_mmu_op_flush_tlb ftlb = { + .header.op = KVM_MMU_OP_FLUSH_TLB, + }; + + kvm_deferred_mmu_op(&ftlb, sizeof ftlb); +} + +static void kvm_release_pt(u32 pfn) +{ + struct kvm_mmu_op_release_pt rpt = { + .header.op = KVM_MMU_OP_RELEASE_PT, + .pt_phys = (u64)pfn << PAGE_SHIFT, + }; + + kvm_mmu_op(&rpt, sizeof rpt); +} + +static void kvm_enter_lazy_mmu(void) +{ + struct kvm_para_state *state = kvm_para_state(); + + paravirt_enter_lazy_mmu(); + state->mode = paravirt_get_lazy_mode(); +} + +static void kvm_leave_lazy_mmu(void) +{ + struct kvm_para_state *state = kvm_para_state(); + + mmu_queue_flush(state); + paravirt_leave_lazy(paravirt_get_lazy_mode()); + state->mode = paravirt_get_lazy_mode(); +} + +static void paravirt_ops_setup(void) +{ + pv_info.name = "KVM"; + pv_info.paravirt_enabled = 1; + + if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) + pv_cpu_ops.io_delay = kvm_io_delay; + + if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { + pv_mmu_ops.set_pte = kvm_set_pte; + pv_mmu_ops.set_pte_at = kvm_set_pte_at; + pv_mmu_ops.set_pmd = kvm_set_pmd; +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE + pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; + pv_mmu_ops.set_pte_present = kvm_set_pte_present; + pv_mmu_ops.pte_clear = kvm_pte_clear; + pv_mmu_ops.pmd_clear = kvm_pmd_clear; +#endif + pv_mmu_ops.set_pud = kvm_set_pud; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.set_pgd = kvm_set_pgd; +#endif +#endif + pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; + pv_mmu_ops.release_pte = kvm_release_pt; + pv_mmu_ops.release_pmd = kvm_release_pt; + pv_mmu_ops.release_pud = kvm_release_pt; + + pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; + pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; + } +} + +void __init kvm_guest_init(void) +{ + if (!kvm_para_available()) + return; + + paravirt_ops_setup(); +} diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c new file mode 100644 index 00000000000..ddee04043ae --- /dev/null +++ b/arch/x86/kernel/kvmclock.c @@ -0,0 +1,187 @@ +/* KVM paravirtual clock driver. A clocksource implementation + Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include <linux/clocksource.h> +#include <linux/kvm_para.h> +#include <asm/arch_hooks.h> +#include <asm/msr.h> +#include <asm/apic.h> +#include <linux/percpu.h> +#include <asm/reboot.h> + +#define KVM_SCALE 22 + +static int kvmclock = 1; + +static int parse_no_kvmclock(char *arg) +{ + kvmclock = 0; + return 0; +} +early_param("no-kvmclock", parse_no_kvmclock); + +/* The hypervisor will put information about time periodically here */ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); +#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field + +static inline u64 kvm_get_delta(u64 last_tsc) +{ + int cpu = smp_processor_id(); + u64 delta = native_read_tsc() - last_tsc; + return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; +} + +static struct kvm_wall_clock wall_clock; +static cycle_t kvm_clock_read(void); +/* + * The wallclock is the time of day when we booted. Since then, some time may + * have elapsed since the hypervisor wrote the data. So we try to account for + * that with system time + */ +unsigned long kvm_get_wallclock(void) +{ + u32 wc_sec, wc_nsec; + u64 delta; + struct timespec ts; + int version, nsec; + int low, high; + + low = (int)__pa(&wall_clock); + high = ((u64)__pa(&wall_clock) >> 32); + + delta = kvm_clock_read(); + + native_write_msr(MSR_KVM_WALL_CLOCK, low, high); + do { + version = wall_clock.wc_version; + rmb(); + wc_sec = wall_clock.wc_sec; + wc_nsec = wall_clock.wc_nsec; + rmb(); + } while ((wall_clock.wc_version != version) || (version & 1)); + + delta = kvm_clock_read() - delta; + delta += wc_nsec; + nsec = do_div(delta, NSEC_PER_SEC); + set_normalized_timespec(&ts, wc_sec + delta, nsec); + /* + * Of all mechanisms of time adjustment I've tested, this one + * was the champion! + */ + return ts.tv_sec + 1; +} + +int kvm_set_wallclock(unsigned long now) +{ + return 0; +} + +/* + * This is our read_clock function. The host puts an tsc timestamp each time + * it updates a new time. Without the tsc adjustment, we can have a situation + * in which a vcpu starts to run earlier (smaller system_time), but probes + * time later (compared to another vcpu), leading to backwards time + */ +static cycle_t kvm_clock_read(void) +{ + u64 last_tsc, now; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + + last_tsc = get_clock(cpu, tsc_timestamp); + now = get_clock(cpu, system_time); + + now += kvm_get_delta(last_tsc); + preempt_enable(); + + return now; +} +static struct clocksource kvm_clock = { + .name = "kvm-clock", + .read = kvm_clock_read, + .rating = 400, + .mask = CLOCKSOURCE_MASK(64), + .mult = 1 << KVM_SCALE, + .shift = KVM_SCALE, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static int kvm_register_clock(void) +{ + int cpu = smp_processor_id(); + int low, high; + low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; + high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); + + return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); +} + +static void kvm_setup_secondary_clock(void) +{ + /* + * Now that the first cpu already had this clocksource initialized, + * we shouldn't fail. + */ + WARN_ON(kvm_register_clock()); + /* ok, done with our trickery, call native */ + setup_secondary_APIC_clock(); +} + +/* + * After the clock is registered, the host will keep writing to the + * registered memory location. If the guest happens to shutdown, this memory + * won't be valid. In cases like kexec, in which you install a new kernel, this + * means a random memory location will be kept being written. So before any + * kind of shutdown from our side, we unregister the clock by writting anything + * that does not have the 'enable' bit set in the msr + */ +#ifdef CONFIG_KEXEC +static void kvm_crash_shutdown(struct pt_regs *regs) +{ + native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); + native_machine_crash_shutdown(regs); +} +#endif + +static void kvm_shutdown(void) +{ + native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); + native_machine_shutdown(); +} + +void __init kvmclock_init(void) +{ + if (!kvm_para_available()) + return; + + if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { + if (kvm_register_clock()) + return; + pv_time_ops.get_wallclock = kvm_get_wallclock; + pv_time_ops.set_wallclock = kvm_set_wallclock; + pv_time_ops.sched_clock = kvm_clock_read; + pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; + machine_ops.shutdown = kvm_shutdown; +#ifdef CONFIG_KEXEC + machine_ops.crash_shutdown = kvm_crash_shutdown; +#endif + clocksource_register(&kvm_clock); + } +} diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index b402c0f3f19..3cad17fe026 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -63,7 +63,7 @@ static int __init mfgpt_fix(char *s) /* The following udocumented bit resets the MFGPT timers */ val = 0xFF; dummy = 0; - wrmsr(0x5140002B, val, dummy); + wrmsr(MSR_MFGPT_SETUP, val, dummy); return 1; } __setup("mfgptfix", mfgpt_fix); @@ -127,17 +127,17 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) * 6; that is, resets for 7 and 8 will be ignored. Is this * a problem? -dilinger */ - msr = MFGPT_NR_MSR; + msr = MSR_MFGPT_NR; mask = 1 << (timer + 24); break; case MFGPT_EVENT_NMI: - msr = MFGPT_NR_MSR; + msr = MSR_MFGPT_NR; mask = 1 << (timer + shift); break; case MFGPT_EVENT_IRQ: - msr = MFGPT_IRQ_MSR; + msr = MSR_MFGPT_IRQ; mask = 1 << (timer + shift); break; @@ -364,7 +364,8 @@ int __init mfgpt_timer_setup(void) geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); /* Set up the clock event */ - mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32); + mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, + mfgpt_clockevent.shift); mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &mfgpt_clockevent); mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE, diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 70744e344fa..3e2c54dc8b2 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -686,13 +686,11 @@ void __init get_smp_config(void) static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { - extern void __bad_mpf_size(void); unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); - if (sizeof(*mpf) != 16) - __bad_mpf_size(); + BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { mpf = (struct intel_mp_floating *)bp; @@ -801,7 +799,6 @@ void __init find_smp_config(void) #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 -#define MP_MAX_IOAPIC_PIN 127 extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; @@ -820,7 +817,7 @@ static int mp_find_ioapic(int gsi) return -1; } -static u8 uniq_ioapic_id(u8 id) +static u8 __init uniq_ioapic_id(u8 id) { #ifdef CONFIG_X86_32 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && @@ -909,14 +906,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ intsrc.mpc_dstirq = pin; /* INTIN# */ - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); + MP_intsrc_info(&intsrc); } int es7000_plat; @@ -985,23 +975,14 @@ void __init mp_config_acpi_legacy_irqs(void) intsrc.mpc_srcbusirq = i; /* Identity mapped */ intsrc.mpc_dstirq = i; - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, - intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); + MP_intsrc_info(&intsrc); } } int mp_register_gsi(u32 gsi, int triggering, int polarity) { - int ioapic = -1; - int ioapic_pin = 0; - int idx, bit = 0; + int ioapic; + int ioapic_pin; #ifdef CONFIG_X86_32 #define MAX_GSI_NUM 4096 #define IRQ_COMPRESSION_START 64 @@ -1041,15 +1022,13 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) * with redundant pin->gsi mappings (but unique PCI devices); * we only program the IOAPIC on the first. */ - bit = ioapic_pin % 32; - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); - if (idx > 3) { + if (ioapic_pin > MP_MAX_IOAPIC_PIN) { printk(KERN_ERR "Invalid reference to IOAPIC pin " "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); return gsi; } - if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { + if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); #ifdef CONFIG_X86_32 @@ -1059,7 +1038,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) #endif } - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit); + set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); #ifdef CONFIG_X86_32 /* * For GSI >= 64, use IRQ compression diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 3733412d135..74f0c5ea2a0 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -366,11 +366,13 @@ struct pv_mmu_ops pv_mmu_ops = { .flush_tlb_single = native_flush_tlb_single, .flush_tlb_others = native_flush_tlb_others, - .alloc_pt = paravirt_nop, - .alloc_pd = paravirt_nop, - .alloc_pd_clone = paravirt_nop, - .release_pt = paravirt_nop, - .release_pd = paravirt_nop, + .alloc_pte = paravirt_nop, + .alloc_pmd = paravirt_nop, + .alloc_pmd_clone = paravirt_nop, + .alloc_pud = paravirt_nop, + .release_pte = paravirt_nop, + .release_pmd = paravirt_nop, + .release_pud = paravirt_nop, .set_pte = native_set_pte, .set_pte_at = native_set_pte_at, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 2edee22e9c3..e28ec497e14 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -43,6 +43,7 @@ #include <asm/system.h> #include <asm/dma.h> #include <asm/rio.h> +#include <asm/bios_ebda.h> #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3004d716539..67e9b4a1e89 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -4,6 +4,8 @@ #include <linux/smp.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/module.h> +#include <linux/pm.h> struct kmem_cache *task_xstate_cachep; @@ -42,3 +44,118 @@ void arch_task_cache_init(void) __alignof__(union thread_xstate), SLAB_PANIC, NULL); } + +static void do_nothing(void *unused) +{ +} + +/* + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of + * pm_idle and update to new pm_idle value. Required while changing pm_idle + * handler on SMP systems. + * + * Caller must have changed pm_idle to the new value before the call. Old + * pm_idle value will not be used by any CPU after the return of this function. + */ +void cpu_idle_wait(void) +{ + smp_mb(); + /* kick all the CPUs so that they exit out of pm_idle */ + smp_call_function(do_nothing, NULL, 0, 1); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + +/* + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, + * which can obviate IPI to trigger checking of need_resched. + * We execute MONITOR against need_resched and enter optimized wait state + * through MWAIT. Whenever someone changes need_resched, we would be woken + * up from MWAIT (without an IPI). + * + * New with Core Duo processors, MWAIT can take some hints based on CPU + * capability. + */ +void mwait_idle_with_hints(unsigned long ax, unsigned long cx) +{ + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __mwait(ax, cx); + } +} + +/* Default MONITOR/MWAIT with no hints, used for default C1 state */ +static void mwait_idle(void) +{ + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + } else + local_irq_enable(); +} + + +static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) +{ + if (force_mwait) + return 1; + /* Any C1 states supported? */ + return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; +} + +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->work.need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle(void) +{ + local_irq_enable(); + cpu_relax(); +} + +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +{ + static int selected; + + if (selected) + return; +#ifdef CONFIG_X86_SMP + if (pm_idle == poll_idle && smp_num_siblings > 1) { + printk(KERN_WARNING "WARNING: polling idle and HT enabled," + " performance may degrade.\n"); + } +#endif + if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { + /* + * Skip, if setup has overridden idle. + * One CPU supports mwait => All CPUs supports mwait + */ + if (!pm_idle) { + printk(KERN_INFO "using mwait in idle threads.\n"); + pm_idle = mwait_idle; + } + } + selected = 1; +} + +static int __init idle_setup(char *str) +{ + if (!strcmp(str, "poll")) { + printk("using polling idle threads.\n"); + pm_idle = poll_idle; + } else if (!strcmp(str, "mwait")) + force_mwait = 1; + else + return -1; + + boot_option_idle_override = 1; + return 0; +} +early_param("idle", idle_setup); + diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 77de848bd1f..f8476dfbb60 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -111,12 +111,10 @@ void default_idle(void) */ smp_mb(); - local_irq_disable(); - if (!need_resched()) { + if (!need_resched()) safe_halt(); /* enables interrupts racelessly */ - local_irq_disable(); - } - local_irq_enable(); + else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } else { local_irq_enable(); @@ -128,17 +126,6 @@ void default_idle(void) EXPORT_SYMBOL(default_idle); #endif -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->work.need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle(void) -{ - local_irq_enable(); - cpu_relax(); -} - #ifdef CONFIG_HOTPLUG_CPU #include <asm/nmi.h> /* We don't actually take CPU down, just spin without interrupts. */ @@ -196,6 +183,7 @@ void cpu_idle(void) if (cpu_is_offline(cpu)) play_dead(); + local_irq_disable(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } @@ -206,104 +194,6 @@ void cpu_idle(void) } } -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 0, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long ax, unsigned long cx) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(ax, cx); - else - local_irq_enable(); - } else - local_irq_enable(); -} - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - local_irq_enable(); - mwait_idle_with_hints(0, 0); -} - -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) -{ - if (force_mwait) - return 1; - /* Any C1 states supported? */ - return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; -} - -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -{ - static int selected; - - if (selected) - return; -#ifdef CONFIG_X86_SMP - if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); - } -#endif - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * Skip, if setup has overridden idle. - * One CPU supports mwait => All CPUs supports mwait - */ - if (!pm_idle) { - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } - selected = 1; -} - -static int __init idle_setup(char *str) -{ - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else - return -1; - - boot_option_idle_override = 1; - return 0; -} -early_param("idle", idle_setup); - void __show_registers(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 131c2ee7ac5..e2319f39988 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -106,26 +106,13 @@ void default_idle(void) * test NEED_RESCHED: */ smp_mb(); - local_irq_disable(); - if (!need_resched()) { + if (!need_resched()) safe_halt(); /* enables interrupts racelessly */ - local_irq_disable(); - } - local_irq_enable(); + else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle(void) -{ - local_irq_enable(); - cpu_relax(); -} - #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); @@ -192,110 +179,6 @@ void cpu_idle(void) } } -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 0, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long ax, unsigned long cx) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __mwait(ax, cx); - } -} - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(0, 0); - else - local_irq_enable(); - } else { - local_irq_enable(); - } -} - - -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) -{ - if (force_mwait) - return 1; - /* Any C1 states supported? */ - return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; -} - -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -{ - static int selected; - - if (selected) - return; -#ifdef CONFIG_X86_SMP - if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); - } -#endif - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * Skip, if setup has overridden idle. - * One CPU supports mwait => All CPUs supports mwait - */ - if (!pm_idle) { - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } - selected = 1; -} - -static int __init idle_setup(char *str) -{ - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else - return -1; - - boot_option_idle_override = 1; - return 0; -} -early_param("idle", idle_setup); - /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs * regs) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 559c1b02741..fb03ef380f0 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1207,97 +1207,16 @@ static int genregs32_set(struct task_struct *target, return ret; } -static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) { - siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); - compat_siginfo_t __user *si32 = compat_ptr(data); - siginfo_t ssi; - int ret; - - if (request == PTRACE_SETSIGINFO) { - memset(&ssi, 0, sizeof(siginfo_t)); - ret = copy_siginfo_from_user32(&ssi, si32); - if (ret) - return ret; - if (copy_to_user(si, &ssi, sizeof(siginfo_t))) - return -EFAULT; - } - ret = sys_ptrace(request, pid, addr, (unsigned long)si); - if (ret) - return ret; - if (request == PTRACE_GETSIGINFO) { - if (copy_from_user(&ssi, si, sizeof(siginfo_t))) - return -EFAULT; - ret = copy_siginfo_to_user32(si32, &ssi); - } - return ret; -} - -asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) -{ - struct task_struct *child; - struct pt_regs *childregs; + unsigned long addr = caddr; + unsigned long data = cdata; void __user *datap = compat_ptr(data); int ret; __u32 val; switch (request) { - case PTRACE_TRACEME: - case PTRACE_ATTACH: - case PTRACE_KILL: - case PTRACE_CONT: - case PTRACE_SINGLESTEP: - case PTRACE_SINGLEBLOCK: - case PTRACE_DETACH: - case PTRACE_SYSCALL: - case PTRACE_OLDSETOPTIONS: - case PTRACE_SETOPTIONS: - case PTRACE_SET_THREAD_AREA: - case PTRACE_GET_THREAD_AREA: -#ifdef X86_BTS - case PTRACE_BTS_CONFIG: - case PTRACE_BTS_STATUS: - case PTRACE_BTS_SIZE: - case PTRACE_BTS_GET: - case PTRACE_BTS_CLEAR: - case PTRACE_BTS_DRAIN: -#endif - return sys_ptrace(request, pid, addr, data); - - default: - return -EINVAL; - - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - case PTRACE_POKEDATA: - case PTRACE_POKETEXT: - case PTRACE_POKEUSR: - case PTRACE_PEEKUSR: - case PTRACE_GETREGS: - case PTRACE_SETREGS: - case PTRACE_SETFPREGS: - case PTRACE_GETFPREGS: - case PTRACE_SETFPXREGS: - case PTRACE_GETFPXREGS: - case PTRACE_GETEVENTMSG: - break; - - case PTRACE_SETSIGINFO: - case PTRACE_GETSIGINFO: - return ptrace32_siginfo(request, pid, addr, data); - } - - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) - return PTR_ERR(child); - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out; - - childregs = task_pt_regs(child); - - switch (request) { case PTRACE_PEEKUSR: ret = getreg32(child, addr, &val); if (ret == 0) @@ -1343,12 +1262,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) sizeof(struct user32_fxsr_struct), datap); + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return arch_ptrace(child, request, addr, data); + default: return compat_ptrace_request(child, request, addr, data); } - out: - put_task_struct(child); return ret; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 19c9386ac11..a4a838306b2 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -8,6 +8,7 @@ #include <asm/apic.h> #include <asm/desc.h> #include <asm/hpet.h> +#include <asm/pgtable.h> #include <asm/reboot_fixups.h> #include <asm/reboot.h> @@ -15,7 +16,6 @@ # include <linux/dmi.h> # include <linux/ctype.h> # include <linux/mc146818rtc.h> -# include <asm/pgtable.h> #else # include <asm/iommu.h> #endif @@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length) /* Remap the kernel at virtual address zero, as well as offset zero from the kernel segment. This assumes the kernel segment starts at virtual address PAGE_OFFSET. */ - memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); /* @@ -399,7 +399,7 @@ static void native_machine_emergency_restart(void) } } -static void native_machine_shutdown(void) +void native_machine_shutdown(void) { /* Stop the cpus and apics */ #ifdef CONFIG_SMP @@ -470,7 +470,10 @@ struct machine_ops machine_ops = { .shutdown = native_machine_shutdown, .emergency_restart = native_machine_emergency_restart, .restart = native_machine_restart, - .halt = native_machine_halt + .halt = native_machine_halt, +#ifdef CONFIG_KEXEC + .crash_shutdown = native_machine_crash_shutdown, +#endif }; void machine_power_off(void) @@ -498,3 +501,9 @@ void machine_halt(void) machine_ops.halt(); } +#ifdef CONFIG_KEXEC +void machine_crash_shutdown(struct pt_regs *regs) +{ + machine_ops.crash_shutdown(regs); +} +#endif diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 455d3c80960..2283422af79 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -47,6 +47,7 @@ #include <linux/pfn.h> #include <linux/pci.h> #include <linux/init_ohci1394_dma.h> +#include <linux/kvm_para.h> #include <video/edid.h> @@ -389,7 +390,6 @@ unsigned long __init find_max_low_pfn(void) return max_low_pfn; } -#define BIOS_EBDA_SEGMENT 0x40E #define BIOS_LOWMEM_KILOBYTES 0x413 /* @@ -420,8 +420,7 @@ static void __init reserve_ebda_region(void) lowmem <<= 10; /* start of EBDA area */ - ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); - ebda_addr <<= 4; + ebda_addr = get_bios_ebda(); /* Fixup: bios puts an EBDA in the top 64K segment */ /* of conventional memory, but does not adjust lowmem. */ @@ -822,6 +821,10 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = setup_memory(); +#ifdef CONFIG_KVM_CLOCK + kvmclock_init(); +#endif + #ifdef CONFIG_VMI /* * Must be after max_low_pfn is determined, and before kernel @@ -829,6 +832,7 @@ void __init setup_arch(char **cmdline_p) */ vmi_init(); #endif + kvm_guest_init(); /* * NOTE: before this point _nobody_ is allowed to allocate diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index c2ec3dcb6b9..a94fb959a87 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -42,6 +42,7 @@ #include <linux/ctype.h> #include <linux/uaccess.h> #include <linux/init_ohci1394_dma.h> +#include <linux/kvm_para.h> #include <asm/mtrr.h> #include <asm/uaccess.h> @@ -116,7 +117,7 @@ extern int root_mountflags; char __initdata command_line[COMMAND_LINE_SIZE]; -struct resource standard_io_resources[] = { +static struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, .end = 0x1f, .flags = IORESOURCE_BUSY | IORESOURCE_IO }, { .name = "pic1", .start = 0x20, .end = 0x21, @@ -190,6 +191,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); e820_register_active_regions(0, start_pfn, end_pfn); free_bootmem_with_active_regions(0, end_pfn); + early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); } #endif @@ -264,6 +266,28 @@ void __attribute__((weak)) __init memory_setup(void) machine_specific_memory_setup(); } +static void __init parse_setup_data(void) +{ + struct setup_data *data; + unsigned long pa_data; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, PAGE_SIZE); + switch (data->type) { + default: + break; + } +#ifndef CONFIG_DEBUG_BOOT_PARAMS + free_early(pa_data, pa_data+sizeof(*data)+data->len); +#endif + pa_data = data->next; + early_iounmap(data, PAGE_SIZE); + } +} + /* * setup_arch - architecture-specific boot-time initializations * @@ -316,6 +340,8 @@ void __init setup_arch(char **cmdline_p) strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; + parse_setup_data(); + parse_early_param(); #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT @@ -359,6 +385,10 @@ void __init setup_arch(char **cmdline_p) io_delay_init(); +#ifdef CONFIG_KVM_CLOCK + kvmclock_init(); +#endif + #ifdef CONFIG_SMP /* setup to use the early static init tables during kernel startup */ x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; @@ -397,8 +427,6 @@ void __init setup_arch(char **cmdline_p) contig_initmem_init(0, end_pfn); #endif - early_res_to_bootmem(); - dma32_reserve_bootmem(); #ifdef CONFIG_ACPI_SLEEP @@ -465,6 +493,8 @@ void __init setup_arch(char **cmdline_p) init_apic_mappings(); ioapic_init_mappings(); + kvm_guest_init(); + /* * We trust e820 completely. No explicit ROM probing in memory. */ diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index f1b11793083..8e05e7f7bd4 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -413,16 +413,6 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, regs->ss = __USER_DS; regs->cs = __USER_CS; - /* - * Clear TF when entering the signal handler, but - * notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 0; give_sigsegv: @@ -501,16 +491,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->ss = __USER_DS; regs->cs = __USER_CS; - /* - * Clear TF when entering the signal handler, but - * notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 0; give_sigsegv: @@ -566,6 +546,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, if (ret) return ret; + /* + * Clear the direction flag as per the ABI for function entry. + */ + regs->flags &= ~X86_EFLAGS_DF; + + /* + * Clear TF when entering the signal handler, but + * notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + regs->flags &= ~X86_EFLAGS_TF; + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 827179c5b32..ccb2a4560c2 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -285,14 +285,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, even if the handler happens to be interrupting 32-bit code. */ regs->cs = __USER_CS; - /* This, by contrast, has nothing to do with segment registers - - see include/asm-x86_64/uaccess.h for details. */ - set_fs(USER_DS); - - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 0; give_sigsegv: @@ -380,6 +372,28 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ret = setup_rt_frame(sig, ka, info, oldset, regs); if (ret == 0) { + /* + * This has nothing to do with segment registers, + * despite the name. This magic affects uaccess.h + * macros' behavior. Reset it to the normal setting. + */ + set_fs(USER_DS); + + /* + * Clear the direction flag as per the ABI for function entry. + */ + regs->flags &= ~X86_EFLAGS_DF; + + /* + * Clear TF when entering the signal handler, but + * notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + regs->flags &= ~X86_EFLAGS_TF; + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ade371f9663..04c662ba18f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) #ifdef CONFIG_X86_32 /* init low mem mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); flush_tlb_all(); #endif @@ -1058,7 +1058,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) check_tsc_sync_source(cpu); local_irq_restore(flags); - while (!cpu_isset(cpu, cpu_online_map)) { + while (!cpu_online(cpu)) { cpu_relax(); touch_nmi_watchdog(); } @@ -1168,7 +1168,7 @@ static void __init smp_cpu_index_default(void) int i; struct cpuinfo_x86 *c; - for_each_cpu_mask(i, cpu_possible_map) { + for_each_possible_cpu(i) { c = &cpu_data(i); /* mark all to hotplug */ c->cpu_index = NR_CPUS; diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 6878a9c2df5..ae751094eba 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/init.h> #include <asm/io.h> +#include <asm/bios_ebda.h> #include <asm/mach-summit/mach_mpparse.h> static struct rio_table_hdr *rio_table_hdr __initdata; @@ -140,8 +141,8 @@ void __init setup_summit(void) int i, next_wpeg, next_bus = 0; /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */ - ptr = *(unsigned short *)phys_to_virt(0x40Eul); - ptr = (unsigned long)phys_to_virt(ptr << 4); + ptr = get_bios_ebda(); + ptr = (unsigned long)phys_to_virt(ptr); rio_table_hdr = NULL; offset = 0x180; diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index df224a8774c..a1f07d79320 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -195,9 +195,9 @@ static int __cpuinit init_smp_flush(void) { int i; - for_each_cpu_mask(i, cpu_possible_map) { + for_each_possible_cpu(i) spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); - } + return 0; } core_initcall(init_smp_flush); diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 64580679861..d8ccc3c6552 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -33,7 +33,7 @@ /* We can free up trampoline after bootup if cpu hotplug is not supported. */ #ifndef CONFIG_HOTPLUG_CPU -.section ".init.data","aw",@progbits +.section ".cpuinit.data","aw",@progbits #else .section .rodata,"a",@progbits #endif diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 471e694d671..bde6f63e15d 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -602,7 +602,7 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) +DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 12affe1f9bc..956f38927aa 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page) * pdes need to be zeroed. */ if (type & VMI_PAGE_CLONE) - limit = USER_PTRS_PER_PGD; + limit = KERNEL_PGD_BOUNDARY; for (i = 0; i < limit; i++) BUG_ON(ptr[i]); } @@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) } #endif -static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); } -static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) { /* * This call comes in very early, before mem_map is setup. @@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); } -static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) +static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) { vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); vmi_check_page_type(clonepfn, VMI_PAGE_L2); vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); } -static void vmi_release_pt(u32 pfn) +static void vmi_release_pte(u32 pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L1); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); } -static void vmi_release_pd(u32 pfn) +static void vmi_release_pmd(u32 pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L2); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); @@ -871,15 +871,15 @@ static inline int __init activate_vmi(void) vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); if (vmi_ops.allocate_page) { - pv_mmu_ops.alloc_pt = vmi_allocate_pt; - pv_mmu_ops.alloc_pd = vmi_allocate_pd; - pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone; + pv_mmu_ops.alloc_pte = vmi_allocate_pte; + pv_mmu_ops.alloc_pmd = vmi_allocate_pmd; + pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone; } vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); if (vmi_ops.release_page) { - pv_mmu_ops.release_pt = vmi_release_pt; - pv_mmu_ops.release_pd = vmi_release_pd; + pv_mmu_ops.release_pte = vmi_release_pte; + pv_mmu_ops.release_pmd = vmi_release_pmd; } /* Set linear is needed in all cases */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index b7ab3c335fa..fad3674b06a 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -209,12 +209,6 @@ SECTIONS EXIT_DATA } -/* vdso blob that is mapped into user space */ - vdso_start = . ; - .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) } - . = ALIGN(PAGE_SIZE); - vdso_end = .; - #ifdef CONFIG_BLK_DEV_INITRD . = ALIGN(PAGE_SIZE); __initramfs_start = .; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 41962e793c0..8d45fabc5f3 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -19,7 +19,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" - depends on HAVE_KVM && EXPERIMENTAL + depends on HAVE_KVM select PREEMPT_NOTIFIERS select ANON_INODES ---help--- @@ -50,6 +50,17 @@ config KVM_AMD Provides support for KVM on AMD processors equipped with the AMD-V (SVM) extensions. +config KVM_TRACE + bool "KVM trace support" + depends on KVM && MARKERS && SYSFS + select RELAY + select DEBUG_FS + default n + ---help--- + This option allows reading a trace of kvm-related events through + relayfs. Note the ABI is not considered stable and will be + modified in future updates. + # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. source drivers/lguest/Kconfig diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index ffdd0b31078..c97d35c218d 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,10 +3,14 @@ # common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) +ifeq ($(CONFIG_KVM_TRACE),y) +common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) +endif EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm -kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o +kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ + i8254.o obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c new file mode 100644 index 00000000000..361e3161127 --- /dev/null +++ b/arch/x86/kvm/i8254.c @@ -0,0 +1,611 @@ +/* + * 8253/8254 interval timer emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2006 Intel Corporation + * Copyright (c) 2007 Keir Fraser, XenSource Inc + * Copyright (c) 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Authors: + * Sheng Yang <sheng.yang@intel.com> + * Based on QEMU and Xen. + */ + +#include <linux/kvm_host.h> + +#include "irq.h" +#include "i8254.h" + +#ifndef CONFIG_X86_64 +#define mod_64(x, y) ((x) - (y) * div64_64(x, y)) +#else +#define mod_64(x, y) ((x) % (y)) +#endif + +#define RW_STATE_LSB 1 +#define RW_STATE_MSB 2 +#define RW_STATE_WORD0 3 +#define RW_STATE_WORD1 4 + +/* Compute with 96 bit intermediate result: (a*b)/c */ +static u64 muldiv64(u64 a, u32 b, u32 c) +{ + union { + u64 ll; + struct { + u32 low, high; + } l; + } u, res; + u64 rl, rh; + + u.ll = a; + rl = (u64)u.l.low * (u64)b; + rh = (u64)u.l.high * (u64)b; + rh += (rl >> 32); + res.l.high = div64_64(rh, c); + res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c); + return res.ll; +} + +static void pit_set_gate(struct kvm *kvm, int channel, u32 val) +{ + struct kvm_kpit_channel_state *c = + &kvm->arch.vpit->pit_state.channels[channel]; + + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + switch (c->mode) { + default: + case 0: + case 4: + /* XXX: just disable/enable counting */ + break; + case 1: + case 2: + case 3: + case 5: + /* Restart counting on rising edge. */ + if (c->gate < val) + c->count_load_time = ktime_get(); + break; + } + + c->gate = val; +} + +int pit_get_gate(struct kvm *kvm, int channel) +{ + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + return kvm->arch.vpit->pit_state.channels[channel].gate; +} + +static int pit_get_count(struct kvm *kvm, int channel) +{ + struct kvm_kpit_channel_state *c = + &kvm->arch.vpit->pit_state.channels[channel]; + s64 d, t; + int counter; + + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); + d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); + + switch (c->mode) { + case 0: + case 1: + case 4: + case 5: + counter = (c->count - d) & 0xffff; + break; + case 3: + /* XXX: may be incorrect for odd counts */ + counter = c->count - (mod_64((2 * d), c->count)); + break; + default: + counter = c->count - mod_64(d, c->count); + break; + } + return counter; +} + +static int pit_get_out(struct kvm *kvm, int channel) +{ + struct kvm_kpit_channel_state *c = + &kvm->arch.vpit->pit_state.channels[channel]; + s64 d, t; + int out; + + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); + d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); + + switch (c->mode) { + default: + case 0: + out = (d >= c->count); + break; + case 1: + out = (d < c->count); + break; + case 2: + out = ((mod_64(d, c->count) == 0) && (d != 0)); + break; + case 3: + out = (mod_64(d, c->count) < ((c->count + 1) >> 1)); + break; + case 4: + case 5: + out = (d == c->count); + break; + } + + return out; +} + +static void pit_latch_count(struct kvm *kvm, int channel) +{ + struct kvm_kpit_channel_state *c = + &kvm->arch.vpit->pit_state.channels[channel]; + + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + if (!c->count_latched) { + c->latched_count = pit_get_count(kvm, channel); + c->count_latched = c->rw_mode; + } +} + +static void pit_latch_status(struct kvm *kvm, int channel) +{ + struct kvm_kpit_channel_state *c = + &kvm->arch.vpit->pit_state.channels[channel]; + + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + if (!c->status_latched) { + /* TODO: Return NULL COUNT (bit 6). */ + c->status = ((pit_get_out(kvm, channel) << 7) | + (c->rw_mode << 4) | + (c->mode << 1) | + c->bcd); + c->status_latched = 1; + } +} + +int __pit_timer_fn(struct kvm_kpit_state *ps) +{ + struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; + struct kvm_kpit_timer *pt = &ps->pit_timer; + + atomic_inc(&pt->pending); + smp_mb__after_atomic_inc(); + /* FIXME: handle case where the guest is in guest mode */ + if (vcpu0 && waitqueue_active(&vcpu0->wq)) { + vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; + wake_up_interruptible(&vcpu0->wq); + } + + pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); + pt->scheduled = ktime_to_ns(pt->timer.expires); + + return (pt->period == 0 ? 0 : 1); +} + +int pit_has_pending_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_pit *pit = vcpu->kvm->arch.vpit; + + if (pit && vcpu->vcpu_id == 0) + return atomic_read(&pit->pit_state.pit_timer.pending); + + return 0; +} + +static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) +{ + struct kvm_kpit_state *ps; + int restart_timer = 0; + + ps = container_of(data, struct kvm_kpit_state, pit_timer.timer); + + restart_timer = __pit_timer_fn(ps); + + if (restart_timer) + return HRTIMER_RESTART; + else + return HRTIMER_NORESTART; +} + +static void destroy_pit_timer(struct kvm_kpit_timer *pt) +{ + pr_debug("pit: execute del timer!\n"); + hrtimer_cancel(&pt->timer); +} + +static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period) +{ + s64 interval; + + interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); + + pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); + + /* TODO The new value only affected after the retriggered */ + hrtimer_cancel(&pt->timer); + pt->period = (is_period == 0) ? 0 : interval; + pt->timer.function = pit_timer_fn; + atomic_set(&pt->pending, 0); + + hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval), + HRTIMER_MODE_ABS); +} + +static void pit_load_count(struct kvm *kvm, int channel, u32 val) +{ + struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; + + WARN_ON(!mutex_is_locked(&ps->lock)); + + pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); + + /* + * Though spec said the state of 8254 is undefined after power-up, + * seems some tricky OS like Windows XP depends on IRQ0 interrupt + * when booting up. + * So here setting initialize rate for it, and not a specific number + */ + if (val == 0) + val = 0x10000; + + ps->channels[channel].count_load_time = ktime_get(); + ps->channels[channel].count = val; + + if (channel != 0) + return; + + /* Two types of timer + * mode 1 is one shot, mode 2 is period, otherwise del timer */ + switch (ps->channels[0].mode) { + case 1: + create_pit_timer(&ps->pit_timer, val, 0); + break; + case 2: + create_pit_timer(&ps->pit_timer, val, 1); + break; + default: + destroy_pit_timer(&ps->pit_timer); + } +} + +void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val) +{ + mutex_lock(&kvm->arch.vpit->pit_state.lock); + pit_load_count(kvm, channel, val); + mutex_unlock(&kvm->arch.vpit->pit_state.lock); +} + +static void pit_ioport_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *data) +{ + struct kvm_pit *pit = (struct kvm_pit *)this->private; + struct kvm_kpit_state *pit_state = &pit->pit_state; + struct kvm *kvm = pit->kvm; + int channel, access; + struct kvm_kpit_channel_state *s; + u32 val = *(u32 *) data; + + val &= 0xff; + addr &= KVM_PIT_CHANNEL_MASK; + + mutex_lock(&pit_state->lock); + + if (val != 0) + pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n", + (unsigned int)addr, len, val); + + if (addr == 3) { + channel = val >> 6; + if (channel == 3) { + /* Read-Back Command. */ + for (channel = 0; channel < 3; channel++) { + s = &pit_state->channels[channel]; + if (val & (2 << channel)) { + if (!(val & 0x20)) + pit_latch_count(kvm, channel); + if (!(val & 0x10)) + pit_latch_status(kvm, channel); + } + } + } else { + /* Select Counter <channel>. */ + s = &pit_state->channels[channel]; + access = (val >> 4) & KVM_PIT_CHANNEL_MASK; + if (access == 0) { + pit_latch_count(kvm, channel); + } else { + s->rw_mode = access; + s->read_state = access; + s->write_state = access; + s->mode = (val >> 1) & 7; + if (s->mode > 5) + s->mode -= 4; + s->bcd = val & 1; + } + } + } else { + /* Write Count. */ + s = &pit_state->channels[addr]; + switch (s->write_state) { + default: + case RW_STATE_LSB: + pit_load_count(kvm, addr, val); + break; + case RW_STATE_MSB: + pit_load_count(kvm, addr, val << 8); + break; + case RW_STATE_WORD0: + s->write_latch = val; + s->write_state = RW_STATE_WORD1; + break; + case RW_STATE_WORD1: + pit_load_count(kvm, addr, s->write_latch | (val << 8)); + s->write_state = RW_STATE_WORD0; + break; + } + } + + mutex_unlock(&pit_state->lock); +} + +static void pit_ioport_read(struct kvm_io_device *this, + gpa_t addr, int len, void *data) +{ + struct kvm_pit *pit = (struct kvm_pit *)this->private; + struct kvm_kpit_state *pit_state = &pit->pit_state; + struct kvm *kvm = pit->kvm; + int ret, count; + struct kvm_kpit_channel_state *s; + + addr &= KVM_PIT_CHANNEL_MASK; + s = &pit_state->channels[addr]; + + mutex_lock(&pit_state->lock); + + if (s->status_latched) { + s->status_latched = 0; + ret = s->status; + } else if (s->count_latched) { + switch (s->count_latched) { + default: + case RW_STATE_LSB: + ret = s->latched_count & 0xff; + s->count_latched = 0; + break; + case RW_STATE_MSB: + ret = s->latched_count >> 8; + s->count_latched = 0; + break; + case RW_STATE_WORD0: + ret = s->latched_count & 0xff; + s->count_latched = RW_STATE_MSB; + break; + } + } else { + switch (s->read_state) { + default: + case RW_STATE_LSB: + count = pit_get_count(kvm, addr); + ret = count & 0xff; + break; + case RW_STATE_MSB: + count = pit_get_count(kvm, addr); + ret = (count >> 8) & 0xff; + break; + case RW_STATE_WORD0: + count = pit_get_count(kvm, addr); + ret = count & 0xff; + s->read_state = RW_STATE_WORD1; + break; + case RW_STATE_WORD1: + count = pit_get_count(kvm, addr); + ret = (count >> 8) & 0xff; + s->read_state = RW_STATE_WORD0; + break; + } + } + + if (len > sizeof(ret)) + len = sizeof(ret); + memcpy(data, (char *)&ret, len); + + mutex_unlock(&pit_state->lock); +} + +static int pit_in_range(struct kvm_io_device *this, gpa_t addr) +{ + return ((addr >= KVM_PIT_BASE_ADDRESS) && + (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); +} + +static void speaker_ioport_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *data) +{ + struct kvm_pit *pit = (struct kvm_pit *)this->private; + struct kvm_kpit_state *pit_state = &pit->pit_state; + struct kvm *kvm = pit->kvm; + u32 val = *(u32 *) data; + + mutex_lock(&pit_state->lock); + pit_state->speaker_data_on = (val >> 1) & 1; + pit_set_gate(kvm, 2, val & 1); + mutex_unlock(&pit_state->lock); +} + +static void speaker_ioport_read(struct kvm_io_device *this, + gpa_t addr, int len, void *data) +{ + struct kvm_pit *pit = (struct kvm_pit *)this->private; + struct kvm_kpit_state *pit_state = &pit->pit_state; + struct kvm *kvm = pit->kvm; + unsigned int refresh_clock; + int ret; + + /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */ + refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1; + + mutex_lock(&pit_state->lock); + ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) | + (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4)); + if (len > sizeof(ret)) + len = sizeof(ret); + memcpy(data, (char *)&ret, len); + mutex_unlock(&pit_state->lock); +} + +static int speaker_in_range(struct kvm_io_device *this, gpa_t addr) +{ + return (addr == KVM_SPEAKER_BASE_ADDRESS); +} + +void kvm_pit_reset(struct kvm_pit *pit) +{ + int i; + struct kvm_kpit_channel_state *c; + + mutex_lock(&pit->pit_state.lock); + for (i = 0; i < 3; i++) { + c = &pit->pit_state.channels[i]; + c->mode = 0xff; + c->gate = (i != 2); + pit_load_count(pit->kvm, i, 0); + } + mutex_unlock(&pit->pit_state.lock); + + atomic_set(&pit->pit_state.pit_timer.pending, 0); + pit->pit_state.inject_pending = 1; +} + +struct kvm_pit *kvm_create_pit(struct kvm *kvm) +{ + struct kvm_pit *pit; + struct kvm_kpit_state *pit_state; + + pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL); + if (!pit) + return NULL; + + mutex_init(&pit->pit_state.lock); + mutex_lock(&pit->pit_state.lock); + + /* Initialize PIO device */ + pit->dev.read = pit_ioport_read; + pit->dev.write = pit_ioport_write; + pit->dev.in_range = pit_in_range; + pit->dev.private = pit; + kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev); + + pit->speaker_dev.read = speaker_ioport_read; + pit->speaker_dev.write = speaker_ioport_write; + pit->speaker_dev.in_range = speaker_in_range; + pit->speaker_dev.private = pit; + kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev); + + kvm->arch.vpit = pit; + pit->kvm = kvm; + + pit_state = &pit->pit_state; + pit_state->pit = pit; + hrtimer_init(&pit_state->pit_timer.timer, + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + mutex_unlock(&pit->pit_state.lock); + + kvm_pit_reset(pit); + + return pit; +} + +void kvm_free_pit(struct kvm *kvm) +{ + struct hrtimer *timer; + + if (kvm->arch.vpit) { + mutex_lock(&kvm->arch.vpit->pit_state.lock); + timer = &kvm->arch.vpit->pit_state.pit_timer.timer; + hrtimer_cancel(timer); + mutex_unlock(&kvm->arch.vpit->pit_state.lock); + kfree(kvm->arch.vpit); + } +} + +void __inject_pit_timer_intr(struct kvm *kvm) +{ + mutex_lock(&kvm->lock); + kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1); + kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0); + kvm_pic_set_irq(pic_irqchip(kvm), 0, 1); + kvm_pic_set_irq(pic_irqchip(kvm), 0, 0); + mutex_unlock(&kvm->lock); +} + +void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) +{ + struct kvm_pit *pit = vcpu->kvm->arch.vpit; + struct kvm *kvm = vcpu->kvm; + struct kvm_kpit_state *ps; + + if (vcpu && pit) { + ps = &pit->pit_state; + + /* Try to inject pending interrupts when: + * 1. Pending exists + * 2. Last interrupt was accepted or waited for too long time*/ + if (atomic_read(&ps->pit_timer.pending) && + (ps->inject_pending || + (jiffies - ps->last_injected_time + >= KVM_MAX_PIT_INTR_INTERVAL))) { + ps->inject_pending = 0; + __inject_pit_timer_intr(kvm); + ps->last_injected_time = jiffies; + } + } +} + +void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec) +{ + struct kvm_arch *arch = &vcpu->kvm->arch; + struct kvm_kpit_state *ps; + + if (vcpu && arch->vpit) { + ps = &arch->vpit->pit_state; + if (atomic_read(&ps->pit_timer.pending) && + (((arch->vpic->pics[0].imr & 1) == 0 && + arch->vpic->pics[0].irq_base == vec) || + (arch->vioapic->redirtbl[0].fields.vector == vec && + arch->vioapic->redirtbl[0].fields.mask != 1))) { + ps->inject_pending = 1; + atomic_dec(&ps->pit_timer.pending); + ps->channels[0].count_load_time = ktime_get(); + } + } +} diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h new file mode 100644 index 00000000000..db25c2a6c8c --- /dev/null +++ b/arch/x86/kvm/i8254.h @@ -0,0 +1,63 @@ +#ifndef __I8254_H +#define __I8254_H + +#include "iodev.h" + +struct kvm_kpit_timer { + struct hrtimer timer; + int irq; + s64 period; /* unit: ns */ + s64 scheduled; + ktime_t last_update; + atomic_t pending; +}; + +struct kvm_kpit_channel_state { + u32 count; /* can be 65536 */ + u16 latched_count; + u8 count_latched; + u8 status_latched; + u8 status; + u8 read_state; + u8 write_state; + u8 write_latch; + u8 rw_mode; + u8 mode; + u8 bcd; /* not supported */ + u8 gate; /* timer start */ + ktime_t count_load_time; +}; + +struct kvm_kpit_state { + struct kvm_kpit_channel_state channels[3]; + struct kvm_kpit_timer pit_timer; + u32 speaker_data_on; + struct mutex lock; + struct kvm_pit *pit; + bool inject_pending; /* if inject pending interrupts */ + unsigned long last_injected_time; +}; + +struct kvm_pit { + unsigned long base_addresss; + struct kvm_io_device dev; + struct kvm_io_device speaker_dev; + struct kvm *kvm; + struct kvm_kpit_state pit_state; +}; + +#define KVM_PIT_BASE_ADDRESS 0x40 +#define KVM_SPEAKER_BASE_ADDRESS 0x61 +#define KVM_PIT_MEM_LENGTH 4 +#define KVM_PIT_FREQ 1193181 +#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 +#define KVM_PIT_CHANNEL_MASK 0x3 + +void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); +void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec); +void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val); +struct kvm_pit *kvm_create_pit(struct kvm *kvm); +void kvm_free_pit(struct kvm *kvm); +void kvm_pit_reset(struct kvm_pit *pit); + +#endif diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index e5714759e97..ce1f583459b 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -23,6 +23,22 @@ #include <linux/kvm_host.h> #include "irq.h" +#include "i8254.h" + +/* + * check if there are pending timer events + * to be processed. + */ +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + int ret; + + ret = pit_has_pending_timer(vcpu); + ret |= apic_has_pending_timer(vcpu); + + return ret; +} +EXPORT_SYMBOL(kvm_cpu_has_pending_timer); /* * check if there is pending interrupt without @@ -66,6 +82,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) { kvm_inject_apic_timer_irqs(vcpu); + kvm_inject_pit_timer_irqs(vcpu); /* TODO: PIT, RTC etc. */ } EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); @@ -73,6 +90,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) { kvm_apic_timer_intr_post(vcpu, vec); + kvm_pit_timer_intr_post(vcpu, vec); /* TODO: PIT, RTC etc. */ } EXPORT_SYMBOL_GPL(kvm_timer_intr_post); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index fa5ed5d59b5..1802134b836 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -85,4 +85,7 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); +int pit_has_pending_timer(struct kvm_vcpu *vcpu); +int apic_has_pending_timer(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index ecdfe97e463..65ef0fc2c03 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h @@ -39,6 +39,8 @@ struct vcpu_svm { unsigned long host_db_regs[NUM_DB_REGS]; unsigned long host_dr6; unsigned long host_dr7; + + u32 *msrpm; }; #endif diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 68a6b151193..57ac4e4c556 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -338,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, } else apic_clear_vector(vector, apic->regs + APIC_TMR); - if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) + if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) kvm_vcpu_kick(vcpu); - else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) { - vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; + else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); } @@ -362,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (level) { - if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) + if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) printk(KERN_DEBUG "INIT on a runnable vcpu %d\n", vcpu->vcpu_id); - vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED; + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; kvm_vcpu_kick(vcpu); } else { printk(KERN_DEBUG @@ -379,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", vcpu->vcpu_id, vector); - if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) { + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { vcpu->arch.sipi_vector = vector; - vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED; + vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); } @@ -658,7 +658,7 @@ static void start_apic_timer(struct kvm_lapic *apic) apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" PRIx64 ", " "timer initial count 0x%x, period %lldns, " - "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__, + "expire @ 0x%016" PRIx64 ".\n", __func__, APIC_BUS_CYCLE_NS, ktime_to_ns(now), apic_get_reg(apic, APIC_TMICT), apic->timer.period, @@ -691,7 +691,7 @@ static void apic_mmio_write(struct kvm_io_device *this, /* too common printing */ if (offset != APIC_EOI) apic_debug("%s: offset 0x%x with length 0x%x, and value is " - "0x%x\n", __FUNCTION__, offset, len, val); + "0x%x\n", __func__, offset, len, val); offset &= 0xff0; @@ -822,6 +822,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | (apic_get_reg(apic, APIC_TASKPRI) & 4)); } +EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) { @@ -869,7 +870,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) struct kvm_lapic *apic; int i; - apic_debug("%s\n", __FUNCTION__); + apic_debug("%s\n", __func__); ASSERT(vcpu); apic = vcpu->arch.apic; @@ -907,7 +908,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) apic_update_ppr(apic); apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" - "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__, + "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, vcpu, kvm_apic_id(apic), vcpu->arch.apic_base, apic->base_address); } @@ -940,7 +941,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic) atomic_inc(&apic->timer.pending); if (waitqueue_active(q)) { - apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; + apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; wake_up_interruptible(q); } if (apic_lvtt_period(apic)) { @@ -952,6 +953,16 @@ static int __apic_timer_fn(struct kvm_lapic *apic) return result; } +int apic_has_pending_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *lapic = vcpu->arch.apic; + + if (lapic) + return atomic_read(&lapic->timer.pending); + + return 0; +} + static int __inject_apic_timer_irq(struct kvm_lapic *apic) { int vector; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e55af12e11b..2ad6f548167 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -27,11 +27,22 @@ #include <linux/highmem.h> #include <linux/module.h> #include <linux/swap.h> +#include <linux/hugetlb.h> +#include <linux/compiler.h> #include <asm/page.h> #include <asm/cmpxchg.h> #include <asm/io.h> +/* + * When setting this variable to true it enables Two-Dimensional-Paging + * where the hardware walks 2 page tables: + * 1. the guest-virtual to guest-physical + * 2. while doing 1. it walks guest-physical to host-physical + * If the hardware supports that we don't need to do shadow paging. + */ +bool tdp_enabled = false; + #undef MMU_DEBUG #undef AUDIT @@ -101,8 +112,6 @@ static int dbg = 1; #define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 -#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) - #define VALID_PAGE(x) ((x) != INVALID_PAGE) #define PT64_LEVEL_BITS 9 @@ -159,6 +168,13 @@ static int dbg = 1; #define ACC_USER_MASK PT_USER_MASK #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) +struct kvm_pv_mmu_op_buffer { + void *ptr; + unsigned len; + unsigned processed; + char buf[512] __aligned(sizeof(long)); +}; + struct kvm_rmap_desc { u64 *shadow_ptes[RMAP_EXT]; struct kvm_rmap_desc *more; @@ -200,11 +216,15 @@ static int is_present_pte(unsigned long pte) static int is_shadow_present_pte(u64 pte) { - pte &= ~PT_SHADOW_IO_MARK; return pte != shadow_trap_nonpresent_pte && pte != shadow_notrap_nonpresent_pte; } +static int is_large_pte(u64 pte) +{ + return pte & PT_PAGE_SIZE_MASK; +} + static int is_writeble_pte(unsigned long pte) { return pte & PT_WRITABLE_MASK; @@ -215,14 +235,14 @@ static int is_dirty_pte(unsigned long pte) return pte & PT_DIRTY_MASK; } -static int is_io_pte(unsigned long pte) +static int is_rmap_pte(u64 pte) { - return pte & PT_SHADOW_IO_MARK; + return is_shadow_present_pte(pte); } -static int is_rmap_pte(u64 pte) +static pfn_t spte_to_pfn(u64 pte) { - return is_shadow_present_pte(pte); + return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; } static gfn_t pse36_gfn_delta(u32 gpte) @@ -349,16 +369,100 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) } /* + * Return the pointer to the largepage write count for a given + * gfn, handling slots that are not large page aligned. + */ +static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot) +{ + unsigned long idx; + + idx = (gfn / KVM_PAGES_PER_HPAGE) - + (slot->base_gfn / KVM_PAGES_PER_HPAGE); + return &slot->lpage_info[idx].write_count; +} + +static void account_shadowed(struct kvm *kvm, gfn_t gfn) +{ + int *write_count; + + write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); + *write_count += 1; + WARN_ON(*write_count > KVM_PAGES_PER_HPAGE); +} + +static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) +{ + int *write_count; + + write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); + *write_count -= 1; + WARN_ON(*write_count < 0); +} + +static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) +{ + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + int *largepage_idx; + + if (slot) { + largepage_idx = slot_largepage_idx(gfn, slot); + return *largepage_idx; + } + + return 1; +} + +static int host_largepage_backed(struct kvm *kvm, gfn_t gfn) +{ + struct vm_area_struct *vma; + unsigned long addr; + + addr = gfn_to_hva(kvm, gfn); + if (kvm_is_error_hva(addr)) + return 0; + + vma = find_vma(current->mm, addr); + if (vma && is_vm_hugetlb_page(vma)) + return 1; + + return 0; +} + +static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn) +{ + struct kvm_memory_slot *slot; + + if (has_wrprotected_page(vcpu->kvm, large_gfn)) + return 0; + + if (!host_largepage_backed(vcpu->kvm, large_gfn)) + return 0; + + slot = gfn_to_memslot(vcpu->kvm, large_gfn); + if (slot && slot->dirty_bitmap) + return 0; + + return 1; +} + +/* * Take gfn and return the reverse mapping to it. * Note: gfn must be unaliased before this function get called */ -static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn) +static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage) { struct kvm_memory_slot *slot; + unsigned long idx; slot = gfn_to_memslot(kvm, gfn); - return &slot->rmap[gfn - slot->base_gfn]; + if (!lpage) + return &slot->rmap[gfn - slot->base_gfn]; + + idx = (gfn / KVM_PAGES_PER_HPAGE) - + (slot->base_gfn / KVM_PAGES_PER_HPAGE); + + return &slot->lpage_info[idx].rmap_pde; } /* @@ -370,7 +474,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn) * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc * containing more mappings. */ -static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) +static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) { struct kvm_mmu_page *sp; struct kvm_rmap_desc *desc; @@ -382,7 +486,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) gfn = unalias_gfn(vcpu->kvm, gfn); sp = page_header(__pa(spte)); sp->gfns[spte - sp->spt] = gfn; - rmapp = gfn_to_rmap(vcpu->kvm, gfn); + rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); if (!*rmapp) { rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); *rmapp = (unsigned long)spte; @@ -435,20 +539,21 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) struct kvm_rmap_desc *desc; struct kvm_rmap_desc *prev_desc; struct kvm_mmu_page *sp; - struct page *page; + pfn_t pfn; unsigned long *rmapp; int i; if (!is_rmap_pte(*spte)) return; sp = page_header(__pa(spte)); - page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); - mark_page_accessed(page); + pfn = spte_to_pfn(*spte); + if (*spte & PT_ACCESSED_MASK) + kvm_set_pfn_accessed(pfn); if (is_writeble_pte(*spte)) - kvm_release_page_dirty(page); + kvm_release_pfn_dirty(pfn); else - kvm_release_page_clean(page); - rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]); + kvm_release_pfn_clean(pfn); + rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte)); if (!*rmapp) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); BUG(); @@ -514,7 +619,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) int write_protected = 0; gfn = unalias_gfn(kvm, gfn); - rmapp = gfn_to_rmap(kvm, gfn); + rmapp = gfn_to_rmap(kvm, gfn, 0); spte = rmap_next(kvm, rmapp, NULL); while (spte) { @@ -527,8 +632,35 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) } spte = rmap_next(kvm, rmapp, spte); } + if (write_protected) { + pfn_t pfn; + + spte = rmap_next(kvm, rmapp, NULL); + pfn = spte_to_pfn(*spte); + kvm_set_pfn_dirty(pfn); + } + + /* check for huge page mappings */ + rmapp = gfn_to_rmap(kvm, gfn, 1); + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!spte); + BUG_ON(!(*spte & PT_PRESENT_MASK)); + BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); + pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); + if (is_writeble_pte(*spte)) { + rmap_remove(kvm, spte); + --kvm->stat.lpages; + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + write_protected = 1; + } + spte = rmap_next(kvm, rmapp, spte); + } + if (write_protected) kvm_flush_remote_tlbs(kvm); + + account_shadowed(kvm, gfn); } #ifdef MMU_DEBUG @@ -538,8 +670,8 @@ static int is_empty_shadow_page(u64 *spt) u64 *end; for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) - if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) { - printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, + if (*pos != shadow_trap_nonpresent_pte) { + printk(KERN_ERR "%s: %p %llx\n", __func__, pos, *pos); return 0; } @@ -559,7 +691,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) static unsigned kvm_page_table_hashfn(gfn_t gfn) { - return gfn; + return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1); } static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, @@ -662,13 +794,14 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) struct kvm_mmu_page *sp; struct hlist_node *node; - pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); - index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; + pgprintk("%s: looking for gfn %lx\n", __func__, gfn); + index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; hlist_for_each_entry(sp, node, bucket, hash_link) - if (sp->gfn == gfn && !sp->role.metaphysical) { + if (sp->gfn == gfn && !sp->role.metaphysical + && !sp->role.invalid) { pgprintk("%s: found role %x\n", - __FUNCTION__, sp->role.word); + __func__, sp->role.word); return sp; } return NULL; @@ -699,27 +832,27 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; role.quadrant = quadrant; } - pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__, + pgprintk("%s: looking gfn %lx role %x\n", __func__, gfn, role.word); - index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; + index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; hlist_for_each_entry(sp, node, bucket, hash_link) if (sp->gfn == gfn && sp->role.word == role.word) { mmu_page_add_parent_pte(vcpu, sp, parent_pte); - pgprintk("%s: found\n", __FUNCTION__); + pgprintk("%s: found\n", __func__); return sp; } ++vcpu->kvm->stat.mmu_cache_miss; sp = kvm_mmu_alloc_page(vcpu, parent_pte); if (!sp) return sp; - pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word); + pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); sp->gfn = gfn; sp->role = role; hlist_add_head(&sp->hash_link, bucket); - vcpu->arch.mmu.prefetch_page(vcpu, sp); if (!metaphysical) rmap_write_protect(vcpu->kvm, gfn); + vcpu->arch.mmu.prefetch_page(vcpu, sp); return sp; } @@ -745,11 +878,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { ent = pt[i]; + if (is_shadow_present_pte(ent)) { + if (!is_large_pte(ent)) { + ent &= PT64_BASE_ADDR_MASK; + mmu_page_remove_parent_pte(page_header(ent), + &pt[i]); + } else { + --kvm->stat.lpages; + rmap_remove(kvm, &pt[i]); + } + } pt[i] = shadow_trap_nonpresent_pte; - if (!is_shadow_present_pte(ent)) - continue; - ent &= PT64_BASE_ADDR_MASK; - mmu_page_remove_parent_pte(page_header(ent), &pt[i]); } kvm_flush_remote_tlbs(kvm); } @@ -789,10 +928,15 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) } kvm_mmu_page_unlink_children(kvm, sp); if (!sp->root_count) { + if (!sp->role.metaphysical) + unaccount_shadowed(kvm, sp->gfn); hlist_del(&sp->hash_link); kvm_mmu_free_page(kvm, sp); - } else + } else { list_move(&sp->link, &kvm->arch.active_mmu_pages); + sp->role.invalid = 1; + kvm_reload_remote_mmus(kvm); + } kvm_mmu_reset_last_pte_updated(kvm); } @@ -838,13 +982,13 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) struct hlist_node *node, *n; int r; - pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); + pgprintk("%s: looking for gfn %lx\n", __func__, gfn); r = 0; - index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; + index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) if (sp->gfn == gfn && !sp->role.metaphysical) { - pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, + pgprintk("%s: gfn %lx role %x\n", __func__, gfn, sp->role.word); kvm_mmu_zap_page(kvm, sp); r = 1; @@ -857,7 +1001,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) struct kvm_mmu_page *sp; while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { - pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word); + pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); kvm_mmu_zap_page(kvm, sp); } } @@ -889,26 +1033,39 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, - int *ptwrite, gfn_t gfn, struct page *page) + int *ptwrite, int largepage, gfn_t gfn, + pfn_t pfn, bool speculative) { u64 spte; int was_rmapped = 0; int was_writeble = is_writeble_pte(*shadow_pte); - hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; pgprintk("%s: spte %llx access %x write_fault %d" " user_fault %d gfn %lx\n", - __FUNCTION__, *shadow_pte, pt_access, + __func__, *shadow_pte, pt_access, write_fault, user_fault, gfn); if (is_rmap_pte(*shadow_pte)) { - if (host_pfn != page_to_pfn(page)) { + /* + * If we overwrite a PTE page pointer with a 2MB PMD, unlink + * the parent of the now unreachable PTE. + */ + if (largepage && !is_large_pte(*shadow_pte)) { + struct kvm_mmu_page *child; + u64 pte = *shadow_pte; + + child = page_header(pte & PT64_BASE_ADDR_MASK); + mmu_page_remove_parent_pte(child, shadow_pte); + } else if (pfn != spte_to_pfn(*shadow_pte)) { pgprintk("hfn old %lx new %lx\n", - host_pfn, page_to_pfn(page)); + spte_to_pfn(*shadow_pte), pfn); rmap_remove(vcpu->kvm, shadow_pte); + } else { + if (largepage) + was_rmapped = is_large_pte(*shadow_pte); + else + was_rmapped = 1; } - else - was_rmapped = 1; } /* @@ -917,6 +1074,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, * demand paging). */ spte = PT_PRESENT_MASK | PT_DIRTY_MASK; + if (!speculative) + pte_access |= PT_ACCESSED_MASK; if (!dirty) pte_access &= ~ACC_WRITE_MASK; if (!(pte_access & ACC_EXEC_MASK)) @@ -925,15 +1084,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, spte |= PT_PRESENT_MASK; if (pte_access & ACC_USER_MASK) spte |= PT_USER_MASK; + if (largepage) + spte |= PT_PAGE_SIZE_MASK; - if (is_error_page(page)) { - set_shadow_pte(shadow_pte, - shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK); - kvm_release_page_clean(page); - return; - } - - spte |= page_to_phys(page); + spte |= (u64)pfn << PAGE_SHIFT; if ((pte_access & ACC_WRITE_MASK) || (write_fault && !is_write_protection(vcpu) && !user_fault)) { @@ -946,9 +1100,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, } shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); - if (shadow) { + if (shadow || + (largepage && has_wrprotected_page(vcpu->kvm, gfn))) { pgprintk("%s: found shadow page for %lx, marking ro\n", - __FUNCTION__, gfn); + __func__, gfn); pte_access &= ~ACC_WRITE_MASK; if (is_writeble_pte(spte)) { spte &= ~PT_WRITABLE_MASK; @@ -964,18 +1119,25 @@ unshadowed: if (pte_access & ACC_WRITE_MASK) mark_page_dirty(vcpu->kvm, gfn); - pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte); + pgprintk("%s: setting spte %llx\n", __func__, spte); + pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n", + (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB", + (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte); set_shadow_pte(shadow_pte, spte); + if (!was_rmapped && (spte & PT_PAGE_SIZE_MASK) + && (spte & PT_PRESENT_MASK)) + ++vcpu->kvm->stat.lpages; + page_header_update_slot(vcpu->kvm, shadow_pte, gfn); if (!was_rmapped) { - rmap_add(vcpu, shadow_pte, gfn); + rmap_add(vcpu, shadow_pte, gfn, largepage); if (!is_rmap_pte(*shadow_pte)) - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); } else { if (was_writeble) - kvm_release_page_dirty(page); + kvm_release_pfn_dirty(pfn); else - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); } if (!ptwrite || !*ptwrite) vcpu->arch.last_pte_updated = shadow_pte; @@ -985,10 +1147,10 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } -static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, - gfn_t gfn, struct page *page) +static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, + int largepage, gfn_t gfn, pfn_t pfn, + int level) { - int level = PT32E_ROOT_LEVEL; hpa_t table_addr = vcpu->arch.mmu.root_hpa; int pt_write = 0; @@ -1001,8 +1163,14 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, if (level == 1) { mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, - 0, write, 1, &pt_write, gfn, page); - return pt_write || is_io_pte(table[index]); + 0, write, 1, &pt_write, 0, gfn, pfn, false); + return pt_write; + } + + if (largepage && level == 2) { + mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, + 0, write, 1, &pt_write, 1, gfn, pfn, false); + return pt_write; } if (table[index] == shadow_trap_nonpresent_pte) { @@ -1016,7 +1184,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, 1, ACC_ALL, &table[index]); if (!new_table) { pgprintk("nonpaging_map: ENOMEM\n"); - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); return -ENOMEM; } @@ -1030,21 +1198,30 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) { int r; - - struct page *page; - - down_read(&vcpu->kvm->slots_lock); + int largepage = 0; + pfn_t pfn; down_read(¤t->mm->mmap_sem); - page = gfn_to_page(vcpu->kvm, gfn); + if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { + gfn &= ~(KVM_PAGES_PER_HPAGE-1); + largepage = 1; + } + + pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); + /* mmio */ + if (is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); + return 1; + } + spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); - r = __nonpaging_map(vcpu, v, write, gfn, page); + r = __direct_map(vcpu, v, write, largepage, gfn, pfn, + PT32E_ROOT_LEVEL); spin_unlock(&vcpu->kvm->mmu_lock); - up_read(&vcpu->kvm->slots_lock); return r; } @@ -1073,6 +1250,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) sp = page_header(root); --sp->root_count; + if (!sp->root_count && sp->role.invalid) + kvm_mmu_zap_page(vcpu->kvm, sp); vcpu->arch.mmu.root_hpa = INVALID_PAGE; spin_unlock(&vcpu->kvm->mmu_lock); return; @@ -1085,6 +1264,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) root &= PT64_BASE_ADDR_MASK; sp = page_header(root); --sp->root_count; + if (!sp->root_count && sp->role.invalid) + kvm_mmu_zap_page(vcpu->kvm, sp); } vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; } @@ -1097,6 +1278,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) int i; gfn_t root_gfn; struct kvm_mmu_page *sp; + int metaphysical = 0; root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; @@ -1105,14 +1287,20 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) hpa_t root = vcpu->arch.mmu.root_hpa; ASSERT(!VALID_PAGE(root)); + if (tdp_enabled) + metaphysical = 1; sp = kvm_mmu_get_page(vcpu, root_gfn, 0, - PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); + PT64_ROOT_LEVEL, metaphysical, + ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; vcpu->arch.mmu.root_hpa = root; return; } #endif + metaphysical = !is_paging(vcpu); + if (tdp_enabled) + metaphysical = 1; for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu.pae_root[i]; @@ -1126,7 +1314,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) } else if (vcpu->arch.mmu.root_level == 0) root_gfn = 0; sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, - PT32_ROOT_LEVEL, !is_paging(vcpu), + PT32_ROOT_LEVEL, metaphysical, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; @@ -1146,7 +1334,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn; int r; - pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code); + pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); r = mmu_topup_memory_caches(vcpu); if (r) return r; @@ -1160,6 +1348,41 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, error_code & PFERR_WRITE_MASK, gfn); } +static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, + u32 error_code) +{ + pfn_t pfn; + int r; + int largepage = 0; + gfn_t gfn = gpa >> PAGE_SHIFT; + + ASSERT(vcpu); + ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); + + r = mmu_topup_memory_caches(vcpu); + if (r) + return r; + + down_read(¤t->mm->mmap_sem); + if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { + gfn &= ~(KVM_PAGES_PER_HPAGE-1); + largepage = 1; + } + pfn = gfn_to_pfn(vcpu->kvm, gfn); + up_read(¤t->mm->mmap_sem); + if (is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); + return 1; + } + spin_lock(&vcpu->kvm->mmu_lock); + kvm_mmu_free_some_pages(vcpu); + r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, + largepage, gfn, pfn, TDP_ROOT_LEVEL); + spin_unlock(&vcpu->kvm->mmu_lock); + + return r; +} + static void nonpaging_free(struct kvm_vcpu *vcpu) { mmu_free_roots(vcpu); @@ -1188,7 +1411,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) static void paging_new_cr3(struct kvm_vcpu *vcpu) { - pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3); + pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3); mmu_free_roots(vcpu); } @@ -1253,7 +1476,35 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu) return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); } -static int init_kvm_mmu(struct kvm_vcpu *vcpu) +static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) +{ + struct kvm_mmu *context = &vcpu->arch.mmu; + + context->new_cr3 = nonpaging_new_cr3; + context->page_fault = tdp_page_fault; + context->free = nonpaging_free; + context->prefetch_page = nonpaging_prefetch_page; + context->shadow_root_level = TDP_ROOT_LEVEL; + context->root_hpa = INVALID_PAGE; + + if (!is_paging(vcpu)) { + context->gva_to_gpa = nonpaging_gva_to_gpa; + context->root_level = 0; + } else if (is_long_mode(vcpu)) { + context->gva_to_gpa = paging64_gva_to_gpa; + context->root_level = PT64_ROOT_LEVEL; + } else if (is_pae(vcpu)) { + context->gva_to_gpa = paging64_gva_to_gpa; + context->root_level = PT32E_ROOT_LEVEL; + } else { + context->gva_to_gpa = paging32_gva_to_gpa; + context->root_level = PT32_ROOT_LEVEL; + } + + return 0; +} + +static int init_kvm_softmmu(struct kvm_vcpu *vcpu) { ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); @@ -1268,6 +1519,16 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) return paging32_init_context(vcpu); } +static int init_kvm_mmu(struct kvm_vcpu *vcpu) +{ + vcpu->arch.update_pte.pfn = bad_pfn; + + if (tdp_enabled) + return init_kvm_tdp_mmu(vcpu); + else + return init_kvm_softmmu(vcpu); +} + static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) { ASSERT(vcpu); @@ -1316,7 +1577,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, pte = *spte; if (is_shadow_present_pte(pte)) { - if (sp->role.level == PT_PAGE_TABLE_LEVEL) + if (sp->role.level == PT_PAGE_TABLE_LEVEL || + is_large_pte(pte)) rmap_remove(vcpu->kvm, spte); else { child = page_header(pte & PT64_BASE_ADDR_MASK); @@ -1324,24 +1586,26 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, } } set_shadow_pte(spte, shadow_trap_nonpresent_pte); + if (is_large_pte(pte)) + --vcpu->kvm->stat.lpages; } static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, - const void *new, int bytes, - int offset_in_pte) + const void *new) { - if (sp->role.level != PT_PAGE_TABLE_LEVEL) { + if ((sp->role.level != PT_PAGE_TABLE_LEVEL) + && !vcpu->arch.update_pte.largepage) { ++vcpu->kvm->stat.mmu_pde_zapped; return; } ++vcpu->kvm->stat.mmu_pte_updated; if (sp->role.glevels == PT32_ROOT_LEVEL) - paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte); + paging32_update_pte(vcpu, sp, spte, new); else - paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte); + paging64_update_pte(vcpu, sp, spte, new); } static bool need_remote_flush(u64 old, u64 new) @@ -1378,7 +1642,9 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn_t gfn; int r; u64 gpte = 0; - struct page *page; + pfn_t pfn; + + vcpu->arch.update_pte.largepage = 0; if (bytes != 4 && bytes != 8) return; @@ -1408,11 +1674,19 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; down_read(¤t->mm->mmap_sem); - page = gfn_to_page(vcpu->kvm, gfn); + if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) { + gfn &= ~(KVM_PAGES_PER_HPAGE-1); + vcpu->arch.update_pte.largepage = 1; + } + pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); + if (is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); + return; + } vcpu->arch.update_pte.gfn = gfn; - vcpu->arch.update_pte.page = page; + vcpu->arch.update_pte.pfn = pfn; } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, @@ -1423,7 +1697,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, struct hlist_node *node, *n; struct hlist_head *bucket; unsigned index; - u64 entry; + u64 entry, gentry; u64 *spte; unsigned offset = offset_in_page(gpa); unsigned pte_size; @@ -1433,8 +1707,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int level; int flooded = 0; int npte; + int r; - pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); + pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); @@ -1450,7 +1725,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, vcpu->arch.last_pt_write_count = 1; vcpu->arch.last_pte_updated = NULL; } - index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; + index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { if (sp->gfn != gfn || sp->role.metaphysical) @@ -1496,20 +1771,29 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, continue; } spte = &sp->spt[page_offset / sizeof(*spte)]; + if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { + gentry = 0; + r = kvm_read_guest_atomic(vcpu->kvm, + gpa & ~(u64)(pte_size - 1), + &gentry, pte_size); + new = (const void *)&gentry; + if (r < 0) + new = NULL; + } while (npte--) { entry = *spte; mmu_pte_write_zap_pte(vcpu, sp, spte); - mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes, - page_offset & (pte_size - 1)); + if (new) + mmu_pte_write_new_pte(vcpu, sp, spte, new); mmu_pte_write_flush_tlb(vcpu, entry, *spte); ++spte; } } kvm_mmu_audit(vcpu, "post pte write"); spin_unlock(&vcpu->kvm->mmu_lock); - if (vcpu->arch.update_pte.page) { - kvm_release_page_clean(vcpu->arch.update_pte.page); - vcpu->arch.update_pte.page = NULL; + if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { + kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); + vcpu->arch.update_pte.pfn = bad_pfn; } } @@ -1518,9 +1802,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) gpa_t gpa; int r; - down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); - up_read(&vcpu->kvm->slots_lock); spin_lock(&vcpu->kvm->mmu_lock); r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); @@ -1577,6 +1859,12 @@ out: } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); +void kvm_enable_tdp(void) +{ + tdp_enabled = true; +} +EXPORT_SYMBOL_GPL(kvm_enable_tdp); + static void free_mmu_pages(struct kvm_vcpu *vcpu) { struct kvm_mmu_page *sp; @@ -1677,7 +1965,53 @@ void kvm_mmu_zap_all(struct kvm *kvm) kvm_flush_remote_tlbs(kvm); } -void kvm_mmu_module_exit(void) +void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) +{ + struct kvm_mmu_page *page; + + page = container_of(kvm->arch.active_mmu_pages.prev, + struct kvm_mmu_page, link); + kvm_mmu_zap_page(kvm, page); +} + +static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) +{ + struct kvm *kvm; + struct kvm *kvm_freed = NULL; + int cache_count = 0; + + spin_lock(&kvm_lock); + + list_for_each_entry(kvm, &vm_list, vm_list) { + int npages; + + spin_lock(&kvm->mmu_lock); + npages = kvm->arch.n_alloc_mmu_pages - + kvm->arch.n_free_mmu_pages; + cache_count += npages; + if (!kvm_freed && nr_to_scan > 0 && npages > 0) { + kvm_mmu_remove_one_alloc_mmu_page(kvm); + cache_count--; + kvm_freed = kvm; + } + nr_to_scan--; + + spin_unlock(&kvm->mmu_lock); + } + if (kvm_freed) + list_move_tail(&kvm_freed->vm_list, &vm_list); + + spin_unlock(&kvm_lock); + + return cache_count; +} + +static struct shrinker mmu_shrinker = { + .shrink = mmu_shrink, + .seeks = DEFAULT_SEEKS * 10, +}; + +void mmu_destroy_caches(void) { if (pte_chain_cache) kmem_cache_destroy(pte_chain_cache); @@ -1687,6 +2021,12 @@ void kvm_mmu_module_exit(void) kmem_cache_destroy(mmu_page_header_cache); } +void kvm_mmu_module_exit(void) +{ + mmu_destroy_caches(); + unregister_shrinker(&mmu_shrinker); +} + int kvm_mmu_module_init(void) { pte_chain_cache = kmem_cache_create("kvm_pte_chain", @@ -1706,10 +2046,12 @@ int kvm_mmu_module_init(void) if (!mmu_page_header_cache) goto nomem; + register_shrinker(&mmu_shrinker); + return 0; nomem: - kvm_mmu_module_exit(); + mmu_destroy_caches(); return -ENOMEM; } @@ -1732,6 +2074,127 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) return nr_mmu_pages; } +static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer, + unsigned len) +{ + if (len > buffer->len) + return NULL; + return buffer->ptr; +} + +static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer, + unsigned len) +{ + void *ret; + + ret = pv_mmu_peek_buffer(buffer, len); + if (!ret) + return ret; + buffer->ptr += len; + buffer->len -= len; + buffer->processed += len; + return ret; +} + +static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, + gpa_t addr, gpa_t value) +{ + int bytes = 8; + int r; + + if (!is_long_mode(vcpu) && !is_pae(vcpu)) + bytes = 4; + + r = mmu_topup_memory_caches(vcpu); + if (r) + return r; + + if (!emulator_write_phys(vcpu, addr, &value, bytes)) + return -EFAULT; + + return 1; +} + +static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->tlb_flush(vcpu); + return 1; +} + +static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr) +{ + spin_lock(&vcpu->kvm->mmu_lock); + mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT); + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; +} + +static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu, + struct kvm_pv_mmu_op_buffer *buffer) +{ + struct kvm_mmu_op_header *header; + + header = pv_mmu_peek_buffer(buffer, sizeof *header); + if (!header) + return 0; + switch (header->op) { + case KVM_MMU_OP_WRITE_PTE: { + struct kvm_mmu_op_write_pte *wpte; + + wpte = pv_mmu_read_buffer(buffer, sizeof *wpte); + if (!wpte) + return 0; + return kvm_pv_mmu_write(vcpu, wpte->pte_phys, + wpte->pte_val); + } + case KVM_MMU_OP_FLUSH_TLB: { + struct kvm_mmu_op_flush_tlb *ftlb; + + ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb); + if (!ftlb) + return 0; + return kvm_pv_mmu_flush_tlb(vcpu); + } + case KVM_MMU_OP_RELEASE_PT: { + struct kvm_mmu_op_release_pt *rpt; + + rpt = pv_mmu_read_buffer(buffer, sizeof *rpt); + if (!rpt) + return 0; + return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys); + } + default: return 0; + } +} + +int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, + gpa_t addr, unsigned long *ret) +{ + int r; + struct kvm_pv_mmu_op_buffer buffer; + + buffer.ptr = buffer.buf; + buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf); + buffer.processed = 0; + + r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len); + if (r) + goto out; + + while (buffer.len) { + r = kvm_pv_mmu_op_one(vcpu, &buffer); + if (r < 0) + goto out; + if (r == 0) + break; + } + + r = 1; +out: + *ret = buffer.processed; + return r; +} + #ifdef AUDIT static const char *audit_msg; @@ -1768,8 +2231,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, audit_mappings_page(vcpu, ent, va, level - 1); } else { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); - struct page *page = gpa_to_page(vcpu, gpa); - hpa_t hpa = page_to_phys(page); + hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT; if (is_shadow_present_pte(ent) && (ent & PT64_BASE_ADDR_MASK) != hpa) @@ -1782,7 +2244,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, && !is_error_hpa(hpa)) printk(KERN_ERR "audit: (%s) notrap shadow," " valid guest gva %lx\n", audit_msg, va); - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); } } @@ -1867,7 +2329,7 @@ static void audit_rmap(struct kvm_vcpu *vcpu) if (n_rmap != n_actual) printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", - __FUNCTION__, audit_msg, n_rmap, n_actual); + __func__, audit_msg, n_rmap, n_actual); } static void audit_write_protection(struct kvm_vcpu *vcpu) @@ -1887,7 +2349,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) if (*rmapp) printk(KERN_ERR "%s: (%s) shadow page has writable" " mappings: gfn %lx role %x\n", - __FUNCTION__, audit_msg, sp->gfn, + __func__, audit_msg, sp->gfn, sp->role.word); } } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 1fce19ec7a2..e64e9f56a65 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -3,6 +3,12 @@ #include <linux/kvm_host.h> +#ifdef CONFIG_X86_64 +#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL +#else +#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL +#endif + static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) { if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ecc0856268c..156fe10288a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -130,7 +130,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, unsigned index, pt_access, pte_access; gpa_t pte_gpa; - pgprintk("%s: addr %lx\n", __FUNCTION__, addr); + pgprintk("%s: addr %lx\n", __func__, addr); walk: walker->level = vcpu->arch.mmu.root_level; pte = vcpu->arch.cr3; @@ -155,7 +155,7 @@ walk: pte_gpa += index * sizeof(pt_element_t); walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, + pgprintk("%s: table_gfn[%d] %lx\n", __func__, walker->level - 1, table_gfn); kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); @@ -222,7 +222,7 @@ walk: walker->pt_access = pt_access; walker->pte_access = pte_access; pgprintk("%s: pte %llx pte_access %x pt_access %x\n", - __FUNCTION__, (u64)pte, pt_access, pte_access); + __func__, (u64)pte, pt_access, pte_access); return 1; not_present: @@ -243,31 +243,30 @@ err: } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, - u64 *spte, const void *pte, int bytes, - int offset_in_pte) + u64 *spte, const void *pte) { pt_element_t gpte; unsigned pte_access; - struct page *npage; + pfn_t pfn; + int largepage = vcpu->arch.update_pte.largepage; gpte = *(const pt_element_t *)pte; if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { - if (!offset_in_pte && !is_present_pte(gpte)) + if (!is_present_pte(gpte)) set_shadow_pte(spte, shadow_notrap_nonpresent_pte); return; } - if (bytes < sizeof(pt_element_t)) - return; - pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); + pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) return; - npage = vcpu->arch.update_pte.page; - if (!npage) + pfn = vcpu->arch.update_pte.pfn; + if (is_error_pfn(pfn)) return; - get_page(npage); + kvm_get_pfn(pfn); mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, - gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage); + gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), + pfn, true); } /* @@ -275,8 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, */ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *walker, - int user_fault, int write_fault, int *ptwrite, - struct page *page) + int user_fault, int write_fault, int largepage, + int *ptwrite, pfn_t pfn) { hpa_t shadow_addr; int level; @@ -304,11 +303,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, shadow_ent = ((u64 *)__va(shadow_addr)) + index; if (level == PT_PAGE_TABLE_LEVEL) break; - if (is_shadow_present_pte(*shadow_ent)) { + + if (largepage && level == PT_DIRECTORY_LEVEL) + break; + + if (is_shadow_present_pte(*shadow_ent) + && !is_large_pte(*shadow_ent)) { shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; continue; } + if (is_large_pte(*shadow_ent)) + rmap_remove(vcpu->kvm, shadow_ent); + if (level - 1 == PT_PAGE_TABLE_LEVEL && walker->level == PT_DIRECTORY_LEVEL) { metaphysical = 1; @@ -329,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, walker->pte_gpa[level - 2], &curr_pte, sizeof(curr_pte)); if (r || curr_pte != walker->ptes[level - 2]) { - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); return NULL; } } @@ -342,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, user_fault, write_fault, walker->ptes[walker->level-1] & PT_DIRTY_MASK, - ptwrite, walker->gfn, page); + ptwrite, largepage, walker->gfn, pfn, false); return shadow_ent; } @@ -371,16 +378,16 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u64 *shadow_pte; int write_pt = 0; int r; - struct page *page; + pfn_t pfn; + int largepage = 0; - pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code); + pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); kvm_mmu_audit(vcpu, "pre page fault"); r = mmu_topup_memory_caches(vcpu); if (r) return r; - down_read(&vcpu->kvm->slots_lock); /* * Look up the shadow pte for the faulting address. */ @@ -391,40 +398,45 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, * The page is not mapped by the guest. Let the guest handle it. */ if (!r) { - pgprintk("%s: guest page fault\n", __FUNCTION__); + pgprintk("%s: guest page fault\n", __func__); inject_page_fault(vcpu, addr, walker.error_code); vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ - up_read(&vcpu->kvm->slots_lock); return 0; } down_read(¤t->mm->mmap_sem); - page = gfn_to_page(vcpu->kvm, walker.gfn); + if (walker.level == PT_DIRECTORY_LEVEL) { + gfn_t large_gfn; + large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); + if (is_largepage_backed(vcpu, large_gfn)) { + walker.gfn = large_gfn; + largepage = 1; + } + } + pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); up_read(¤t->mm->mmap_sem); + /* mmio */ + if (is_error_pfn(pfn)) { + pgprintk("gfn %x is mmio\n", walker.gfn); + kvm_release_pfn_clean(pfn); + return 1; + } + spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, - &write_pt, page); - pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, + largepage, &write_pt, pfn); + + pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, shadow_pte, *shadow_pte, write_pt); if (!write_pt) vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ - /* - * mmio: emulate if accessible, otherwise its a guest fault. - */ - if (shadow_pte && is_io_pte(*shadow_pte)) { - spin_unlock(&vcpu->kvm->mmu_lock); - up_read(&vcpu->kvm->slots_lock); - return 1; - } - ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, "post page fault (fixed)"); spin_unlock(&vcpu->kvm->mmu_lock); - up_read(&vcpu->kvm->slots_lock); return write_pt; } diff --git a/arch/x86/kvm/segment_descriptor.h b/arch/x86/kvm/segment_descriptor.h deleted file mode 100644 index 56fc4c87338..00000000000 --- a/arch/x86/kvm/segment_descriptor.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __SEGMENT_DESCRIPTOR_H -#define __SEGMENT_DESCRIPTOR_H - -struct segment_descriptor { - u16 limit_low; - u16 base_low; - u8 base_mid; - u8 type : 4; - u8 system : 1; - u8 dpl : 2; - u8 present : 1; - u8 limit_high : 4; - u8 avl : 1; - u8 long_mode : 1; - u8 default_op : 1; - u8 granularity : 1; - u8 base_high; -} __attribute__((packed)); - -#ifdef CONFIG_X86_64 -/* LDT or TSS descriptor in the GDT. 16 bytes. */ -struct segment_descriptor_64 { - struct segment_descriptor s; - u32 base_higher; - u32 pad_zero; -}; - -#endif -#endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1a582f1090e..89e0be2c10d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -47,6 +47,18 @@ MODULE_LICENSE("GPL"); #define SVM_FEATURE_LBRV (1 << 1) #define SVM_DEATURE_SVML (1 << 2) +#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) + +/* enable NPT for AMD64 and X86 with PAE */ +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +static bool npt_enabled = true; +#else +static bool npt_enabled = false; +#endif +static int npt = 1; + +module_param(npt, int, S_IRUGO); + static void kvm_reput_irq(struct vcpu_svm *svm); static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) @@ -54,8 +66,7 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) return container_of(vcpu, struct vcpu_svm, vcpu); } -unsigned long iopm_base; -unsigned long msrpm_base; +static unsigned long iopm_base; struct kvm_ldttss_desc { u16 limit0; @@ -182,7 +193,7 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { - if (!(efer & EFER_LMA)) + if (!npt_enabled && !(efer & EFER_LMA)) efer &= ~EFER_LME; to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; @@ -219,12 +230,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (!svm->next_rip) { - printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); + printk(KERN_DEBUG "%s: NOP\n", __func__); return; } if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", - __FUNCTION__, + __func__, svm->vmcb->save.rip, svm->next_rip); @@ -279,11 +290,7 @@ static void svm_hardware_enable(void *garbage) struct svm_cpu_data *svm_data; uint64_t efer; -#ifdef CONFIG_X86_64 - struct desc_ptr gdt_descr; -#else struct desc_ptr gdt_descr; -#endif struct desc_struct *gdt; int me = raw_smp_processor_id(); @@ -302,7 +309,6 @@ static void svm_hardware_enable(void *garbage) svm_data->asid_generation = 1; svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; svm_data->next_asid = svm_data->max_asid + 1; - svm_features = cpuid_edx(SVM_CPUID_FUNC); asm volatile ("sgdt %0" : "=m"(gdt_descr)); gdt = (struct desc_struct *)gdt_descr.address; @@ -361,12 +367,51 @@ static void set_msr_interception(u32 *msrpm, unsigned msr, BUG(); } +static void svm_vcpu_init_msrpm(u32 *msrpm) +{ + memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); + +#ifdef CONFIG_X86_64 + set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); + set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); + set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); + set_msr_interception(msrpm, MSR_LSTAR, 1, 1); + set_msr_interception(msrpm, MSR_CSTAR, 1, 1); + set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); +#endif + set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); + set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); + set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1); + set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1); +} + +static void svm_enable_lbrv(struct vcpu_svm *svm) +{ + u32 *msrpm = svm->msrpm; + + svm->vmcb->control.lbr_ctl = 1; + set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); + set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); + set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); + set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1); +} + +static void svm_disable_lbrv(struct vcpu_svm *svm) +{ + u32 *msrpm = svm->msrpm; + + svm->vmcb->control.lbr_ctl = 0; + set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); + set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); + set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); + set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); +} + static __init int svm_hardware_setup(void) { int cpu; struct page *iopm_pages; - struct page *msrpm_pages; - void *iopm_va, *msrpm_va; + void *iopm_va; int r; iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); @@ -379,41 +424,33 @@ static __init int svm_hardware_setup(void) clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */ iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; + if (boot_cpu_has(X86_FEATURE_NX)) + kvm_enable_efer_bits(EFER_NX); - msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + for_each_online_cpu(cpu) { + r = svm_cpu_init(cpu); + if (r) + goto err; + } - r = -ENOMEM; - if (!msrpm_pages) - goto err_1; + svm_features = cpuid_edx(SVM_CPUID_FUNC); - msrpm_va = page_address(msrpm_pages); - memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); - msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT; + if (!svm_has(SVM_FEATURE_NPT)) + npt_enabled = false; -#ifdef CONFIG_X86_64 - set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1); - set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1); - set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1); - set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1); - set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1); - set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1); -#endif - set_msr_interception(msrpm_va, MSR_K6_STAR, 1, 1); - set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1); - set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1); - set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1); + if (npt_enabled && !npt) { + printk(KERN_INFO "kvm: Nested Paging disabled\n"); + npt_enabled = false; + } - for_each_online_cpu(cpu) { - r = svm_cpu_init(cpu); - if (r) - goto err_2; + if (npt_enabled) { + printk(KERN_INFO "kvm: Nested Paging enabled\n"); + kvm_enable_tdp(); } + return 0; -err_2: - __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); - msrpm_base = 0; -err_1: +err: __free_pages(iopm_pages, IOPM_ALLOC_ORDER); iopm_base = 0; return r; @@ -421,9 +458,8 @@ err_1: static __exit void svm_hardware_unsetup(void) { - __free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER); __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); - iopm_base = msrpm_base = 0; + iopm_base = 0; } static void init_seg(struct vmcb_seg *seg) @@ -443,15 +479,14 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } -static void init_vmcb(struct vmcb *vmcb) +static void init_vmcb(struct vcpu_svm *svm) { - struct vmcb_control_area *control = &vmcb->control; - struct vmcb_save_area *save = &vmcb->save; + struct vmcb_control_area *control = &svm->vmcb->control; + struct vmcb_save_area *save = &svm->vmcb->save; control->intercept_cr_read = INTERCEPT_CR0_MASK | INTERCEPT_CR3_MASK | - INTERCEPT_CR4_MASK | - INTERCEPT_CR8_MASK; + INTERCEPT_CR4_MASK; control->intercept_cr_write = INTERCEPT_CR0_MASK | INTERCEPT_CR3_MASK | @@ -471,23 +506,13 @@ static void init_vmcb(struct vmcb *vmcb) INTERCEPT_DR7_MASK; control->intercept_exceptions = (1 << PF_VECTOR) | - (1 << UD_VECTOR); + (1 << UD_VECTOR) | + (1 << MC_VECTOR); control->intercept = (1ULL << INTERCEPT_INTR) | (1ULL << INTERCEPT_NMI) | (1ULL << INTERCEPT_SMI) | - /* - * selective cr0 intercept bug? - * 0: 0f 22 d8 mov %eax,%cr3 - * 3: 0f 20 c0 mov %cr0,%eax - * 6: 0d 00 00 00 80 or $0x80000000,%eax - * b: 0f 22 c0 mov %eax,%cr0 - * set cr3 ->interception - * get cr0 ->interception - * set cr0 -> no interception - */ - /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */ (1ULL << INTERCEPT_CPUID) | (1ULL << INTERCEPT_INVD) | (1ULL << INTERCEPT_HLT) | @@ -508,7 +533,7 @@ static void init_vmcb(struct vmcb *vmcb) (1ULL << INTERCEPT_MWAIT); control->iopm_base_pa = iopm_base; - control->msrpm_base_pa = msrpm_base; + control->msrpm_base_pa = __pa(svm->msrpm); control->tsc_offset = 0; control->int_ctl = V_INTR_MASKING_MASK; @@ -550,13 +575,30 @@ static void init_vmcb(struct vmcb *vmcb) save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; save->cr4 = X86_CR4_PAE; /* rdx = ?? */ + + if (npt_enabled) { + /* Setup VMCB for Nested Paging */ + control->nested_ctl = 1; + control->intercept &= ~(1ULL << INTERCEPT_TASK_SWITCH); + control->intercept_exceptions &= ~(1 << PF_VECTOR); + control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| + INTERCEPT_CR3_MASK); + control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| + INTERCEPT_CR3_MASK); + save->g_pat = 0x0007040600070406ULL; + /* enable caching because the QEMU Bios doesn't enable it */ + save->cr0 = X86_CR0_ET; + save->cr3 = 0; + save->cr4 = 0; + } + force_new_asid(&svm->vcpu); } static int svm_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - init_vmcb(svm->vmcb); + init_vmcb(svm); if (vcpu->vcpu_id != 0) { svm->vmcb->save.rip = 0; @@ -571,6 +613,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) { struct vcpu_svm *svm; struct page *page; + struct page *msrpm_pages; int err; svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); @@ -589,12 +632,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto uninit; } + err = -ENOMEM; + msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + if (!msrpm_pages) + goto uninit; + svm->msrpm = page_address(msrpm_pages); + svm_vcpu_init_msrpm(svm->msrpm); + svm->vmcb = page_address(page); clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; memset(svm->db_regs, 0, sizeof(svm->db_regs)); - init_vmcb(svm->vmcb); + init_vmcb(svm); fx_init(&svm->vcpu); svm->vcpu.fpu_active = 1; @@ -617,6 +667,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); + __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); } @@ -731,6 +782,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->unusable = !var->present; } +static int svm_get_cpl(struct kvm_vcpu *vcpu) +{ + struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; + + return save->cpl; +} + static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { struct vcpu_svm *svm = to_svm(vcpu); @@ -784,6 +842,9 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } } #endif + if (npt_enabled) + goto set; + if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); vcpu->fpu_active = 1; @@ -791,18 +852,29 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vcpu->arch.cr0 = cr0; cr0 |= X86_CR0_PG | X86_CR0_WP; - cr0 &= ~(X86_CR0_CD | X86_CR0_NW); if (!vcpu->fpu_active) { svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); cr0 |= X86_CR0_TS; } +set: + /* + * re-enable caching here because the QEMU bios + * does not do it - this results in some delay at + * reboot + */ + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; } static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - vcpu->arch.cr4 = cr4; - to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE; + unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; + + vcpu->arch.cr4 = cr4; + if (!npt_enabled) + cr4 |= X86_CR4_PAE; + cr4 |= host_cr4_mce; + to_svm(vcpu)->vmcb->save.cr4 = cr4; } static void svm_set_segment(struct kvm_vcpu *vcpu, @@ -833,13 +905,6 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, } -/* FIXME: - - svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK; - svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK); - -*/ - static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) { return -EOPNOTSUPP; @@ -920,7 +985,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, } default: printk(KERN_DEBUG "%s: unexpected dr %u\n", - __FUNCTION__, dr); + __func__, dr); *exception = UD_VECTOR; return; } @@ -962,6 +1027,19 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } +static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + /* + * On an #MC intercept the MCE handler is not called automatically in + * the host. So do it by hand here. + */ + asm volatile ( + "int $0x12\n"); + /* not sure if we ever come back to this point */ + + return 1; +} + static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { /* @@ -969,7 +1047,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) * so reinitialize it. */ clear_page(svm->vmcb); - init_vmcb(svm->vmcb); + init_vmcb(svm); kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; return 0; @@ -1033,9 +1111,18 @@ static int invalid_op_interception(struct vcpu_svm *svm, static int task_switch_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__); - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - return 0; + u16 tss_selector; + + tss_selector = (u16)svm->vmcb->control.exit_info_1; + if (svm->vmcb->control.exit_info_2 & + (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) + return kvm_task_switch(&svm->vcpu, tss_selector, + TASK_SWITCH_IRET); + if (svm->vmcb->control.exit_info_2 & + (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) + return kvm_task_switch(&svm->vcpu, tss_selector, + TASK_SWITCH_JMP); + return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL); } static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) @@ -1049,7 +1136,7 @@ static int emulate_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) - pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__); + pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); return 1; } @@ -1179,8 +1266,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) svm->vmcb->save.sysenter_esp = data; break; case MSR_IA32_DEBUGCTLMSR: - pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", - __FUNCTION__, data); + if (!svm_has(SVM_FEATURE_LBRV)) { + pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", + __func__, data); + break; + } + if (data & DEBUGCTL_RESERVED_BITS) + return 1; + + svm->vmcb->save.dbgctl = data; + if (data & (1ULL<<0)) + svm_enable_lbrv(svm); + else + svm_disable_lbrv(svm); break; case MSR_K7_EVNTSEL0: case MSR_K7_EVNTSEL1: @@ -1265,6 +1363,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, + [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, [SVM_EXIT_INTR] = nop_on_interception, [SVM_EXIT_NMI] = nop_on_interception, [SVM_EXIT_SMI] = nop_on_interception, @@ -1290,14 +1389,34 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_WBINVD] = emulate_on_interception, [SVM_EXIT_MONITOR] = invalid_op_interception, [SVM_EXIT_MWAIT] = invalid_op_interception, + [SVM_EXIT_NPF] = pf_interception, }; - static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u32 exit_code = svm->vmcb->control.exit_code; + if (npt_enabled) { + int mmu_reload = 0; + if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { + svm_set_cr0(vcpu, svm->vmcb->save.cr0); + mmu_reload = 1; + } + vcpu->arch.cr0 = svm->vmcb->save.cr0; + vcpu->arch.cr3 = svm->vmcb->save.cr3; + if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { + kvm_inject_gp(vcpu, 0); + return 1; + } + } + if (mmu_reload) { + kvm_mmu_reset_context(vcpu); + kvm_mmu_load(vcpu); + } + } + kvm_reput_irq(svm); if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { @@ -1308,10 +1427,11 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } if (is_external_interrupt(svm->vmcb->control.exit_int_info) && - exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) + exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && + exit_code != SVM_EXIT_NPF) printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " "exit_code 0x%x\n", - __FUNCTION__, svm->vmcb->control.exit_int_info, + __func__, svm->vmcb->control.exit_int_info, exit_code); if (exit_code >= ARRAY_SIZE(svm_exit_handlers) @@ -1364,6 +1484,27 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) svm_inject_irq(svm, irq); } +static void update_cr8_intercept(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb = svm->vmcb; + int max_irr, tpr; + + if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr) + return; + + vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; + + max_irr = kvm_lapic_find_highest_irr(vcpu); + if (max_irr == -1) + return; + + tpr = kvm_lapic_get_cr8(vcpu) << 4; + + if (tpr >= (max_irr & 0xf0)) + vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; +} + static void svm_intr_assist(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1376,14 +1517,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) SVM_EVTINJ_VEC_MASK; vmcb->control.exit_int_info = 0; svm_inject_irq(svm, intr_vector); - return; + goto out; } if (vmcb->control.int_ctl & V_IRQ_MASK) - return; + goto out; if (!kvm_cpu_has_interrupt(vcpu)) - return; + goto out; if (!(vmcb->save.rflags & X86_EFLAGS_IF) || (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || @@ -1391,12 +1532,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) /* unable to deliver irq, set pending irq */ vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); svm_inject_irq(svm, 0x0); - return; + goto out; } /* Okay, we can deliver the interrupt: grab it and update PIC state. */ intr_vector = kvm_cpu_get_interrupt(vcpu); svm_inject_irq(svm, intr_vector); kvm_timer_intr_post(vcpu, intr_vector); +out: + update_cr8_intercept(vcpu); } static void kvm_reput_irq(struct vcpu_svm *svm) @@ -1482,6 +1625,29 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) { } +static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { + int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; + kvm_lapic_set_tpr(vcpu, cr8); + } +} + +static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 cr8; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + cr8 = kvm_get_cr8(vcpu); + svm->vmcb->control.int_ctl &= ~V_TPR_MASK; + svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; +} + static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1491,6 +1657,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) pre_svm_run(svm); + sync_lapic_to_cr8(vcpu); + save_host_msrs(vcpu); fs_selector = read_fs(); gs_selector = read_gs(); @@ -1499,6 +1667,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) svm->host_dr6 = read_dr6(); svm->host_dr7 = read_dr7(); svm->vmcb->save.cr2 = vcpu->arch.cr2; + /* required for live migration with NPT */ + if (npt_enabled) + svm->vmcb->save.cr3 = vcpu->arch.cr3; if (svm->vmcb->save.dr7 & 0xff) { write_dr7(0); @@ -1635,6 +1806,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) stgi(); + sync_cr8_to_lapic(vcpu); + svm->next_rip = 0; } @@ -1642,6 +1815,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); + if (npt_enabled) { + svm->vmcb->control.nested_cr3 = root; + force_new_asid(vcpu); + return; + } + svm->vmcb->save.cr3 = root; force_new_asid(vcpu); @@ -1709,6 +1888,7 @@ static struct kvm_x86_ops svm_x86_ops = { .get_segment_base = svm_get_segment_base, .get_segment = svm_get_segment, .set_segment = svm_set_segment, + .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, .set_cr0 = svm_set_cr0, diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h index 5fd50491b55..1b8afa78e86 100644 --- a/arch/x86/kvm/svm.h +++ b/arch/x86/kvm/svm.h @@ -238,6 +238,9 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID #define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR +#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 +#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 + #define SVM_EXIT_READ_CR0 0x000 #define SVM_EXIT_READ_CR3 0x003 #define SVM_EXIT_READ_CR4 0x004 diff --git a/arch/x86/kvm/tss.h b/arch/x86/kvm/tss.h new file mode 100644 index 00000000000..622aa10f692 --- /dev/null +++ b/arch/x86/kvm/tss.h @@ -0,0 +1,59 @@ +#ifndef __TSS_SEGMENT_H +#define __TSS_SEGMENT_H + +struct tss_segment_32 { + u32 prev_task_link; + u32 esp0; + u32 ss0; + u32 esp1; + u32 ss1; + u32 esp2; + u32 ss2; + u32 cr3; + u32 eip; + u32 eflags; + u32 eax; + u32 ecx; + u32 edx; + u32 ebx; + u32 esp; + u32 ebp; + u32 esi; + u32 edi; + u32 es; + u32 cs; + u32 ss; + u32 ds; + u32 fs; + u32 gs; + u32 ldt_selector; + u16 t; + u16 io_map; +}; + +struct tss_segment_16 { + u16 prev_task_link; + u16 sp0; + u16 ss0; + u16 sp1; + u16 ss1; + u16 sp2; + u16 ss2; + u16 ip; + u16 flag; + u16 ax; + u16 cx; + u16 dx; + u16 bx; + u16 sp; + u16 bp; + u16 si; + u16 di; + u16 es; + u16 cs; + u16 ss; + u16 ds; + u16 ldt; +}; + +#endif diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8e1462880d1..8e5d6645b90 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -17,7 +17,6 @@ #include "irq.h" #include "vmx.h" -#include "segment_descriptor.h" #include "mmu.h" #include <linux/kvm_host.h> @@ -37,6 +36,12 @@ MODULE_LICENSE("GPL"); static int bypass_guest_pf = 1; module_param(bypass_guest_pf, bool, 0); +static int enable_vpid = 1; +module_param(enable_vpid, bool, 0); + +static int flexpriority_enabled = 1; +module_param(flexpriority_enabled, bool, 0); + struct vmcs { u32 revision_id; u32 abort; @@ -71,6 +76,7 @@ struct vcpu_vmx { unsigned rip; } irq; } rmode; + int vpid; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -85,6 +91,10 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static struct page *vmx_io_bitmap_a; static struct page *vmx_io_bitmap_b; +static struct page *vmx_msr_bitmap; + +static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); +static DEFINE_SPINLOCK(vmx_vpid_lock); static struct vmcs_config { int size; @@ -176,6 +186,11 @@ static inline int is_external_interrupt(u32 intr_info) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } +static inline int cpu_has_vmx_msr_bitmap(void) +{ + return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS); +} + static inline int cpu_has_vmx_tpr_shadow(void) { return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); @@ -194,8 +209,9 @@ static inline int cpu_has_secondary_exec_ctrls(void) static inline bool cpu_has_vmx_virtualize_apic_accesses(void) { - return (vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); + return flexpriority_enabled + && (vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); } static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) @@ -204,6 +220,12 @@ static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) (irqchip_in_kernel(kvm))); } +static inline int cpu_has_vmx_vpid(void) +{ + return (vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_VPID); +} + static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -214,6 +236,20 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) return -1; } +static inline void __invvpid(int ext, u16 vpid, gva_t gva) +{ + struct { + u64 vpid : 16; + u64 rsvd : 48; + u64 gva; + } operand = { vpid, 0, gva }; + + asm volatile (ASM_VMX_INVVPID + /* CF==1 or ZF==1 --> rc = -1 */ + "; ja 1f ; ud2 ; 1:" + : : "a"(&operand), "c"(ext) : "cc", "memory"); +} + static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -257,6 +293,14 @@ static void vcpu_clear(struct vcpu_vmx *vmx) vmx->launched = 0; } +static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) +{ + if (vmx->vpid == 0) + return; + + __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); +} + static unsigned long vmcs_readl(unsigned long field) { unsigned long value; @@ -353,7 +397,7 @@ static void reload_tss(void) * VT restores TR but not its size. Useless. */ struct descriptor_table gdt; - struct segment_descriptor *descs; + struct desc_struct *descs; get_gdt(&gdt); descs = (void *)gdt.base; @@ -485,11 +529,12 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 phys_addr = __pa(vmx->vmcs); - u64 tsc_this, delta; + u64 tsc_this, delta, new_offset; if (vcpu->cpu != cpu) { vcpu_clear(vmx); kvm_migrate_apic_timer(vcpu); + vpid_sync_vcpu_all(vmx); } if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { @@ -524,8 +569,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * Make sure the time stamp counter is monotonous. */ rdtscll(tsc_this); - delta = vcpu->arch.host_tsc - tsc_this; - vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta); + if (tsc_this < vcpu->arch.host_tsc) { + delta = vcpu->arch.host_tsc - tsc_this; + new_offset = vmcs_read64(TSC_OFFSET) + delta; + vmcs_write64(TSC_OFFSET, new_offset); + } } } @@ -596,7 +644,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, { vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, nr | INTR_TYPE_EXCEPTION - | (has_error_code ? INTR_INFO_DELIEVER_CODE_MASK : 0) + | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) | INTR_INFO_VALID_MASK); if (has_error_code) vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); @@ -959,6 +1007,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING; opt = CPU_BASED_TPR_SHADOW | + CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, &_cpu_based_exec_control) < 0) @@ -971,7 +1020,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { min = 0; opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_WBINVD_EXITING; + SECONDARY_EXEC_WBINVD_EXITING | + SECONDARY_EXEC_ENABLE_VPID; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) return -EIO; @@ -1080,6 +1130,10 @@ static __init int hardware_setup(void) { if (setup_vmcs_config(&vmcs_config) < 0) return -EIO; + + if (boot_cpu_has(X86_FEATURE_NX)) + kvm_enable_efer_bits(EFER_NX); + return alloc_kvm_area(); } @@ -1214,7 +1268,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu) guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { printk(KERN_DEBUG "%s: tss fixup for long mode. \n", - __FUNCTION__); + __func__); vmcs_write32(GUEST_TR_AR_BYTES, (guest_tr_ar & ~AR_TYPE_MASK) | AR_TYPE_BUSY_64_TSS); @@ -1239,6 +1293,11 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif +static void vmx_flush_tlb(struct kvm_vcpu *vcpu) +{ + vpid_sync_vcpu_all(to_vmx(vcpu)); +} + static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) { vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; @@ -1275,6 +1334,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + vmx_flush_tlb(vcpu); vmcs_writel(GUEST_CR3, cr3); if (vcpu->arch.cr0 & X86_CR0_PE) vmx_fpu_deactivate(vcpu); @@ -1288,14 +1348,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vcpu->arch.cr4 = cr4; } -#ifdef CONFIG_X86_64 - static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); vcpu->arch.shadow_efer = efer; + if (!msr) + return; if (efer & EFER_LMA) { vmcs_write32(VM_ENTRY_CONTROLS, vmcs_read32(VM_ENTRY_CONTROLS) | @@ -1312,8 +1372,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) setup_msrs(vmx); } -#endif - static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -1344,6 +1402,20 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->unusable = (ar >> 16) & 1; } +static int vmx_get_cpl(struct kvm_vcpu *vcpu) +{ + struct kvm_segment kvm_seg; + + if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ + return 0; + + if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ + return 3; + + vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS); + return kvm_seg.selector & 3; +} + static u32 vmx_segment_access_rights(struct kvm_segment *var) { u32 ar; @@ -1433,7 +1505,6 @@ static int init_rmode_tss(struct kvm *kvm) int ret = 0; int r; - down_read(&kvm->slots_lock); r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -1456,7 +1527,6 @@ static int init_rmode_tss(struct kvm *kvm) ret = 1; out: - up_read(&kvm->slots_lock); return ret; } @@ -1494,6 +1564,46 @@ out: return r; } +static void allocate_vpid(struct vcpu_vmx *vmx) +{ + int vpid; + + vmx->vpid = 0; + if (!enable_vpid || !cpu_has_vmx_vpid()) + return; + spin_lock(&vmx_vpid_lock); + vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); + if (vpid < VMX_NR_VPIDS) { + vmx->vpid = vpid; + __set_bit(vpid, vmx_vpid_bitmap); + } + spin_unlock(&vmx_vpid_lock); +} + +void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) +{ + void *va; + + if (!cpu_has_vmx_msr_bitmap()) + return; + + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + va = kmap(msr_bitmap); + if (msr <= 0x1fff) { + __clear_bit(msr, va + 0x000); /* read-low */ + __clear_bit(msr, va + 0x800); /* write-low */ + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + __clear_bit(msr, va + 0x400); /* read-high */ + __clear_bit(msr, va + 0xc00); /* write-high */ + } + kunmap(msr_bitmap); +} + /* * Sets up the vmcs for emulated real mode. */ @@ -1511,6 +1621,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap)); + vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ /* Control */ @@ -1532,6 +1645,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + if (vmx->vpid == 0) + exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } @@ -1613,6 +1728,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) u64 msr; int ret; + down_read(&vcpu->kvm->slots_lock); if (!init_rmode_tss(vmx->vcpu.kvm)) { ret = -ENOMEM; goto out; @@ -1621,7 +1737,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.rmode.active = 0; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); - set_cr8(&vmx->vcpu, 0); + kvm_set_cr8(&vmx->vcpu, 0); msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; if (vmx->vcpu.vcpu_id == 0) msr |= MSR_IA32_APICBASE_BSP; @@ -1704,18 +1820,22 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); + if (vmx->vpid != 0) + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); + vmx->vcpu.arch.cr0 = 0x60000010; vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ vmx_set_cr4(&vmx->vcpu, 0); -#ifdef CONFIG_X86_64 vmx_set_efer(&vmx->vcpu, 0); -#endif vmx_fpu_activate(&vmx->vcpu); update_exception_bitmap(&vmx->vcpu); - return 0; + vpid_sync_vcpu_all(vmx); + + ret = 0; out: + up_read(&vcpu->kvm->slots_lock); return ret; } @@ -1723,6 +1843,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) { struct vcpu_vmx *vmx = to_vmx(vcpu); + KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); + if (vcpu->arch.rmode.active) { vmx->rmode.irq.pending = true; vmx->rmode.irq.vector = irq; @@ -1844,7 +1966,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if ((vect_info & VECTORING_INFO_VALID_MASK) && !is_page_fault(intr_info)) printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " - "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); + "intr info 0x%x\n", __func__, vect_info, intr_info); if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { int irq = vect_info & VECTORING_INFO_VECTOR_MASK; @@ -1869,10 +1991,12 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) error_code = 0; rip = vmcs_readl(GUEST_RIP); - if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) + if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { cr2 = vmcs_readl(EXIT_QUALIFICATION); + KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, + (u32)((u64)cr2 >> 32), handler); return kvm_mmu_page_fault(vcpu, cr2, error_code); } @@ -1901,6 +2025,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { ++vcpu->stat.irq_exits; + KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler); return 1; } @@ -1958,25 +2083,27 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) reg = (exit_qualification >> 8) & 15; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ + KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg], + (u32)((u64)vcpu->arch.regs[reg] >> 32), handler); switch (cr) { case 0: vcpu_load_rsp_rip(vcpu); - set_cr0(vcpu, vcpu->arch.regs[reg]); + kvm_set_cr0(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); return 1; case 3: vcpu_load_rsp_rip(vcpu); - set_cr3(vcpu, vcpu->arch.regs[reg]); + kvm_set_cr3(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); return 1; case 4: vcpu_load_rsp_rip(vcpu); - set_cr4(vcpu, vcpu->arch.regs[reg]); + kvm_set_cr4(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); return 1; case 8: vcpu_load_rsp_rip(vcpu); - set_cr8(vcpu, vcpu->arch.regs[reg]); + kvm_set_cr8(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); if (irqchip_in_kernel(vcpu->kvm)) return 1; @@ -1990,6 +2117,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.cr0 &= ~X86_CR0_TS; vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); vmx_fpu_activate(vcpu); + KVMTRACE_0D(CLTS, vcpu, handler); skip_emulated_instruction(vcpu); return 1; case 1: /*mov from cr*/ @@ -1998,18 +2126,24 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load_rsp_rip(vcpu); vcpu->arch.regs[reg] = vcpu->arch.cr3; vcpu_put_rsp_rip(vcpu); + KVMTRACE_3D(CR_READ, vcpu, (u32)cr, + (u32)vcpu->arch.regs[reg], + (u32)((u64)vcpu->arch.regs[reg] >> 32), + handler); skip_emulated_instruction(vcpu); return 1; case 8: vcpu_load_rsp_rip(vcpu); - vcpu->arch.regs[reg] = get_cr8(vcpu); + vcpu->arch.regs[reg] = kvm_get_cr8(vcpu); vcpu_put_rsp_rip(vcpu); + KVMTRACE_2D(CR_READ, vcpu, (u32)cr, + (u32)vcpu->arch.regs[reg], handler); skip_emulated_instruction(vcpu); return 1; } break; case 3: /* lmsw */ - lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); + kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); skip_emulated_instruction(vcpu); return 1; @@ -2049,6 +2183,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) val = 0; } vcpu->arch.regs[reg] = val; + KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); } else { /* mov to dr */ } @@ -2073,6 +2208,9 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } + KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32), + handler); + /* FIXME: handling of bits 32:63 of rax, rdx */ vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u; @@ -2086,6 +2224,9 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); + KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32), + handler); + if (vmx_set_msr(vcpu, ecx, data) != 0) { kvm_inject_gp(vcpu, 0); return 1; @@ -2110,6 +2251,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + + KVMTRACE_0D(PEND_INTR, vcpu, handler); + /* * If the user space waits to inject interrupts, exit as soon as * possible @@ -2152,6 +2296,8 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_read64(EXIT_QUALIFICATION); offset = exit_qualification & 0xffful; + KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler); + er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); if (er != EMULATE_DONE) { @@ -2163,6 +2309,20 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + unsigned long exit_qualification; + u16 tss_selector; + int reason; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + reason = (u32)exit_qualification >> 30; + tss_selector = exit_qualification; + + return kvm_task_switch(vcpu, tss_selector, reason); +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -2185,6 +2345,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, + [EXIT_REASON_TASK_SWITCH] = handle_task_switch, }; static const int kvm_vmx_max_exit_handlers = @@ -2200,6 +2361,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); u32 vectoring_info = vmx->idt_vectoring_info; + KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), + (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); + if (unlikely(vmx->fail)) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason @@ -2210,7 +2374,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if ((vectoring_info & VECTORING_INFO_VALID_MASK) && exit_reason != EXIT_REASON_EXCEPTION_NMI) printk(KERN_WARNING "%s: unexpected, valid vectoring info and " - "exit reason is 0x%x\n", __FUNCTION__, exit_reason); + "exit reason is 0x%x\n", __func__, exit_reason); if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); @@ -2221,10 +2385,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return 0; } -static void vmx_flush_tlb(struct kvm_vcpu *vcpu) -{ -} - static void update_tpr_threshold(struct kvm_vcpu *vcpu) { int max_irr, tpr; @@ -2285,11 +2445,13 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) return; } + KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); - if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK)) + if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, vmcs_read32(IDT_VECTORING_ERROR_CODE)); if (unlikely(has_ext_irq)) @@ -2470,8 +2632,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ + KVMTRACE_0D(NMI, vcpu, handler); asm("int $2"); + } } static void vmx_free_vmcs(struct kvm_vcpu *vcpu) @@ -2489,6 +2653,10 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + spin_lock(&vmx_vpid_lock); + if (vmx->vpid != 0) + __clear_bit(vmx->vpid, vmx_vpid_bitmap); + spin_unlock(&vmx_vpid_lock); vmx_free_vmcs(vcpu); kfree(vmx->host_msrs); kfree(vmx->guest_msrs); @@ -2505,6 +2673,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx) return ERR_PTR(-ENOMEM); + allocate_vpid(vmx); + err = kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) goto free_vcpu; @@ -2591,14 +2761,13 @@ static struct kvm_x86_ops vmx_x86_ops = { .get_segment_base = vmx_get_segment_base, .get_segment = vmx_get_segment, .set_segment = vmx_set_segment, + .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, .set_cr0 = vmx_set_cr0, .set_cr3 = vmx_set_cr3, .set_cr4 = vmx_set_cr4, -#ifdef CONFIG_X86_64 .set_efer = vmx_set_efer, -#endif .get_idt = vmx_get_idt, .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, @@ -2626,7 +2795,7 @@ static struct kvm_x86_ops vmx_x86_ops = { static int __init vmx_init(void) { - void *iova; + void *va; int r; vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); @@ -2639,28 +2808,48 @@ static int __init vmx_init(void) goto out; } + vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!vmx_msr_bitmap) { + r = -ENOMEM; + goto out1; + } + /* * Allow direct access to the PC debug port (it is often used for I/O * delays, but the vmexits simply slow things down). */ - iova = kmap(vmx_io_bitmap_a); - memset(iova, 0xff, PAGE_SIZE); - clear_bit(0x80, iova); + va = kmap(vmx_io_bitmap_a); + memset(va, 0xff, PAGE_SIZE); + clear_bit(0x80, va); kunmap(vmx_io_bitmap_a); - iova = kmap(vmx_io_bitmap_b); - memset(iova, 0xff, PAGE_SIZE); + va = kmap(vmx_io_bitmap_b); + memset(va, 0xff, PAGE_SIZE); kunmap(vmx_io_bitmap_b); + va = kmap(vmx_msr_bitmap); + memset(va, 0xff, PAGE_SIZE); + kunmap(vmx_msr_bitmap); + + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); if (r) - goto out1; + goto out2; + + vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE); + vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE); + vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS); + vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); + vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); return 0; +out2: + __free_page(vmx_msr_bitmap); out1: __free_page(vmx_io_bitmap_b); out: @@ -2670,6 +2859,7 @@ out: static void __exit vmx_exit(void) { + __free_page(vmx_msr_bitmap); __free_page(vmx_io_bitmap_b); __free_page(vmx_io_bitmap_a); diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index d52ae8d7303..5dff4606b98 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -49,6 +49,7 @@ * Definitions of Secondary Processor-Based VM-Execution Controls. */ #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 @@ -65,6 +66,7 @@ /* VMCS Encodings */ enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ -231,12 +233,12 @@ enum vmcs_field { */ #define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ #define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ -#define INTR_INFO_DELIEVER_CODE_MASK 0x800 /* 11 */ +#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ #define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK #define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK -#define VECTORING_INFO_DELIEVER_CODE_MASK INTR_INFO_DELIEVER_CODE_MASK +#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ @@ -321,4 +323,8 @@ enum vmcs_field { #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 +#define VMX_NR_VPIDS (1 << 16) +#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 +#define VMX_VPID_EXTENT_ALL_CONTEXT 2 + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6b01552bd1f..0ce556372a4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -15,10 +15,12 @@ */ #include <linux/kvm_host.h> -#include "segment_descriptor.h" #include "irq.h" #include "mmu.h" +#include "i8254.h" +#include "tss.h" +#include <linux/clocksource.h> #include <linux/kvm.h> #include <linux/fs.h> #include <linux/vmalloc.h> @@ -28,6 +30,7 @@ #include <asm/uaccess.h> #include <asm/msr.h> +#include <asm/desc.h> #define MAX_IO_MSRS 256 #define CR0_RESERVED_BITS \ @@ -41,7 +44,15 @@ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) -#define EFER_RESERVED_BITS 0xfffffffffffff2fe +/* EFER defaults: + * - enable syscall per default because its emulated by KVM + * - enable LME and LMA per default on 64 bit KVM + */ +#ifdef CONFIG_X86_64 +static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL; +#else +static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; +#endif #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU @@ -63,6 +74,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "irq_window", VCPU_STAT(irq_window_exits) }, { "halt_exits", VCPU_STAT(halt_exits) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, + { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, { "irq_exits", VCPU_STAT(irq_exits) }, { "host_state_reload", VCPU_STAT(host_state_reload) }, @@ -78,6 +90,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_recycled", VM_STAT(mmu_recycled) }, { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, + { "largepages", VM_STAT(lpages) }, { NULL } }; @@ -85,7 +98,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { unsigned long segment_base(u16 selector) { struct descriptor_table gdt; - struct segment_descriptor *d; + struct desc_struct *d; unsigned long table_base; unsigned long v; @@ -101,13 +114,12 @@ unsigned long segment_base(u16 selector) asm("sldt %0" : "=g"(ldt_selector)); table_base = segment_base(ldt_selector); } - d = (struct segment_descriptor *)(table_base + (selector & ~7)); - v = d->base_low | ((unsigned long)d->base_mid << 16) | - ((unsigned long)d->base_high << 24); + d = (struct desc_struct *)(table_base + (selector & ~7)); + v = d->base0 | ((unsigned long)d->base1 << 16) | + ((unsigned long)d->base2 << 24); #ifdef CONFIG_X86_64 - if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) - v |= ((unsigned long) \ - ((struct segment_descriptor_64 *)d)->base_higher) << 32; + if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) + v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; #endif return v; } @@ -145,11 +157,16 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, u32 error_code) { ++vcpu->stat.pf_guest; - if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) { - printk(KERN_DEBUG "kvm: inject_page_fault:" - " double fault 0x%lx\n", addr); - vcpu->arch.exception.nr = DF_VECTOR; - vcpu->arch.exception.error_code = 0; + if (vcpu->arch.exception.pending) { + if (vcpu->arch.exception.nr == PF_VECTOR) { + printk(KERN_DEBUG "kvm: inject_page_fault:" + " double fault 0x%lx\n", addr); + vcpu->arch.exception.nr = DF_VECTOR; + vcpu->arch.exception.error_code = 0; + } else if (vcpu->arch.exception.nr == DF_VECTOR) { + /* triple fault -> shutdown */ + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); + } return; } vcpu->arch.cr2 = addr; @@ -184,7 +201,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) int ret; u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; - down_read(&vcpu->kvm->slots_lock); ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, offset * sizeof(u64), sizeof(pdpte)); if (ret < 0) { @@ -201,10 +217,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); out: - up_read(&vcpu->kvm->slots_lock); return ret; } +EXPORT_SYMBOL_GPL(load_pdptrs); static bool pdptrs_changed(struct kvm_vcpu *vcpu) { @@ -215,18 +231,16 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) if (is_long_mode(vcpu) || !is_pae(vcpu)) return false; - down_read(&vcpu->kvm->slots_lock); r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); if (r < 0) goto out; changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; out: - up_read(&vcpu->kvm->slots_lock); return changed; } -void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { if (cr0 & CR0_RESERVED_BITS) { printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", @@ -284,15 +298,18 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_mmu_reset_context(vcpu); return; } -EXPORT_SYMBOL_GPL(set_cr0); +EXPORT_SYMBOL_GPL(kvm_set_cr0); -void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) +void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) { - set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); + kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); + KVMTRACE_1D(LMSW, vcpu, + (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)), + handler); } -EXPORT_SYMBOL_GPL(lmsw); +EXPORT_SYMBOL_GPL(kvm_lmsw); -void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { if (cr4 & CR4_RESERVED_BITS) { printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); @@ -323,9 +340,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vcpu->arch.cr4 = cr4; kvm_mmu_reset_context(vcpu); } -EXPORT_SYMBOL_GPL(set_cr4); +EXPORT_SYMBOL_GPL(kvm_set_cr4); -void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) +void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { kvm_mmu_flush_tlb(vcpu); @@ -359,7 +376,6 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) */ } - down_read(&vcpu->kvm->slots_lock); /* * Does the new cr3 value map to physical memory? (Note, we * catch an invalid cr3 even in real-mode, because it would @@ -375,11 +391,10 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) vcpu->arch.cr3 = cr3; vcpu->arch.mmu.new_cr3(vcpu); } - up_read(&vcpu->kvm->slots_lock); } -EXPORT_SYMBOL_GPL(set_cr3); +EXPORT_SYMBOL_GPL(kvm_set_cr3); -void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) +void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { if (cr8 & CR8_RESERVED_BITS) { printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); @@ -391,16 +406,16 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) else vcpu->arch.cr8 = cr8; } -EXPORT_SYMBOL_GPL(set_cr8); +EXPORT_SYMBOL_GPL(kvm_set_cr8); -unsigned long get_cr8(struct kvm_vcpu *vcpu) +unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) { if (irqchip_in_kernel(vcpu->kvm)) return kvm_lapic_get_cr8(vcpu); else return vcpu->arch.cr8; } -EXPORT_SYMBOL_GPL(get_cr8); +EXPORT_SYMBOL_GPL(kvm_get_cr8); /* * List of msr numbers which we expose to userspace through KVM_GET_MSRS @@ -415,7 +430,8 @@ static u32 msrs_to_save[] = { #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TIME_STAMP_COUNTER, + MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, + MSR_IA32_PERF_STATUS, }; static unsigned num_msrs_to_save; @@ -424,11 +440,9 @@ static u32 emulated_msrs[] = { MSR_IA32_MISC_ENABLE, }; -#ifdef CONFIG_X86_64 - static void set_efer(struct kvm_vcpu *vcpu, u64 efer) { - if (efer & EFER_RESERVED_BITS) { + if (efer & efer_reserved_bits) { printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", efer); kvm_inject_gp(vcpu, 0); @@ -450,7 +464,12 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) vcpu->arch.shadow_efer = efer; } -#endif +void kvm_enable_efer_bits(u64 mask) +{ + efer_reserved_bits &= ~mask; +} +EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); + /* * Writes msr value into into the appropriate "register". @@ -470,26 +489,86 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) return kvm_set_msr(vcpu, index, *data); } +static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) +{ + static int version; + struct kvm_wall_clock wc; + struct timespec wc_ts; + + if (!wall_clock) + return; + + version++; + + kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); + + wc_ts = current_kernel_time(); + wc.wc_sec = wc_ts.tv_sec; + wc.wc_nsec = wc_ts.tv_nsec; + wc.wc_version = version; + + kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); + + version++; + kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); +} + +static void kvm_write_guest_time(struct kvm_vcpu *v) +{ + struct timespec ts; + unsigned long flags; + struct kvm_vcpu_arch *vcpu = &v->arch; + void *shared_kaddr; + + if ((!vcpu->time_page)) + return; + + /* Keep irq disabled to prevent changes to the clock */ + local_irq_save(flags); + kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, + &vcpu->hv_clock.tsc_timestamp); + ktime_get_ts(&ts); + local_irq_restore(flags); + + /* With all the info we got, fill in the values */ + + vcpu->hv_clock.system_time = ts.tv_nsec + + (NSEC_PER_SEC * (u64)ts.tv_sec); + /* + * The interface expects us to write an even number signaling that the + * update is finished. Since the guest won't see the intermediate + * state, we just write "2" at the end + */ + vcpu->hv_clock.version = 2; + + shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); + + memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); + + kunmap_atomic(shared_kaddr, KM_USER0); + + mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); +} + int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) { switch (msr) { -#ifdef CONFIG_X86_64 case MSR_EFER: set_efer(vcpu, data); break; -#endif case MSR_IA32_MC0_STATUS: pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", - __FUNCTION__, data); + __func__, data); break; case MSR_IA32_MCG_STATUS: pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", - __FUNCTION__, data); + __func__, data); break; case MSR_IA32_MCG_CTL: pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", - __FUNCTION__, data); + __func__, data); break; case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: @@ -501,6 +580,42 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) case MSR_IA32_MISC_ENABLE: vcpu->arch.ia32_misc_enable_msr = data; break; + case MSR_KVM_WALL_CLOCK: + vcpu->kvm->arch.wall_clock = data; + kvm_write_wall_clock(vcpu->kvm, data); + break; + case MSR_KVM_SYSTEM_TIME: { + if (vcpu->arch.time_page) { + kvm_release_page_dirty(vcpu->arch.time_page); + vcpu->arch.time_page = NULL; + } + + vcpu->arch.time = data; + + /* we verify if the enable bit is set... */ + if (!(data & 1)) + break; + + /* ...but clean it before doing the actual write */ + vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); + + vcpu->arch.hv_clock.tsc_to_system_mul = + clocksource_khz2mult(tsc_khz, 22); + vcpu->arch.hv_clock.tsc_shift = 22; + + down_read(¤t->mm->mmap_sem); + vcpu->arch.time_page = + gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); + + if (is_error_page(vcpu->arch.time_page)) { + kvm_release_page_clean(vcpu->arch.time_page); + vcpu->arch.time_page = NULL; + } + + kvm_write_guest_time(vcpu); + break; + } default: pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); return 1; @@ -540,7 +655,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_MC0_MISC+12: case MSR_IA32_MC0_MISC+16: case MSR_IA32_UCODE_REV: - case MSR_IA32_PERF_STATUS: case MSR_IA32_EBL_CR_POWERON: /* MTRR registers */ case 0xfe: @@ -556,11 +670,21 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_MISC_ENABLE: data = vcpu->arch.ia32_misc_enable_msr; break; -#ifdef CONFIG_X86_64 + case MSR_IA32_PERF_STATUS: + /* TSC increment by tick */ + data = 1000ULL; + /* CPU multiplier */ + data |= (((uint64_t)4ULL) << 40); + break; case MSR_EFER: data = vcpu->arch.shadow_efer; break; -#endif + case MSR_KVM_WALL_CLOCK: + data = vcpu->kvm->arch.wall_clock; + break; + case MSR_KVM_SYSTEM_TIME: + data = vcpu->arch.time; + break; default: pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); return 1; @@ -584,9 +708,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, vcpu_load(vcpu); + down_read(&vcpu->kvm->slots_lock); for (i = 0; i < msrs->nmsrs; ++i) if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; + up_read(&vcpu->kvm->slots_lock); vcpu_put(vcpu); @@ -688,11 +814,24 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_USER_MEMORY: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_CLOCKSOURCE: + case KVM_CAP_PIT: + case KVM_CAP_NOP_IO_DELAY: + case KVM_CAP_MP_STATE: r = 1; break; case KVM_CAP_VAPIC: r = !kvm_x86_ops->cpu_has_accelerated_tpr(); break; + case KVM_CAP_NR_VCPUS: + r = KVM_MAX_VCPUS; + break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_MEMORY_SLOTS; + break; + case KVM_CAP_PV_MMU: + r = !tdp_enabled; + break; default: r = 0; break; @@ -763,6 +902,7 @@ out: void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); + kvm_write_guest_time(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -958,32 +1098,32 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } /* function 4 and 0xb have additional index. */ case 4: { - int index, cache_type; + int i, cache_type; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until cache_type is zero */ - for (index = 1; *nent < maxnent; ++index) { - cache_type = entry[index - 1].eax & 0x1f; + for (i = 1; *nent < maxnent; ++i) { + cache_type = entry[i - 1].eax & 0x1f; if (!cache_type) break; - do_cpuid_1_ent(&entry[index], function, index); - entry[index].flags |= + do_cpuid_1_ent(&entry[i], function, i); + entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; } break; } case 0xb: { - int index, level_type; + int i, level_type; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until level_type is zero */ - for (index = 1; *nent < maxnent; ++index) { - level_type = entry[index - 1].ecx & 0xff; + for (i = 1; *nent < maxnent; ++i) { + level_type = entry[i - 1].ecx & 0xff; if (!level_type) break; - do_cpuid_1_ent(&entry[index], function, index); - entry[index].flags |= + do_cpuid_1_ent(&entry[i], function, i); + entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; } @@ -1365,6 +1505,23 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) return r; } +static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) +{ + int r = 0; + + memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); + return r; +} + +static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) +{ + int r = 0; + + memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); + kvm_pit_load_count(kvm, 0, ps->channels[0].count); + return r; +} + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -1457,6 +1614,12 @@ long kvm_arch_vm_ioctl(struct file *filp, } else goto out; break; + case KVM_CREATE_PIT: + r = -ENOMEM; + kvm->arch.vpit = kvm_create_pit(kvm); + if (kvm->arch.vpit) + r = 0; + break; case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; @@ -1512,6 +1675,37 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_GET_PIT: { + struct kvm_pit_state ps; + r = -EFAULT; + if (copy_from_user(&ps, argp, sizeof ps)) + goto out; + r = -ENXIO; + if (!kvm->arch.vpit) + goto out; + r = kvm_vm_ioctl_get_pit(kvm, &ps); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &ps, sizeof ps)) + goto out; + r = 0; + break; + } + case KVM_SET_PIT: { + struct kvm_pit_state ps; + r = -EFAULT; + if (copy_from_user(&ps, argp, sizeof ps)) + goto out; + r = -ENXIO; + if (!kvm->arch.vpit) + goto out; + r = kvm_vm_ioctl_set_pit(kvm, &ps); + if (r) + goto out; + r = 0; + break; + } default: ; } @@ -1570,7 +1764,6 @@ int emulator_read_std(unsigned long addr, void *data = val; int r = X86EMUL_CONTINUE; - down_read(&vcpu->kvm->slots_lock); while (bytes) { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); unsigned offset = addr & (PAGE_SIZE-1); @@ -1592,7 +1785,6 @@ int emulator_read_std(unsigned long addr, addr += tocopy; } out: - up_read(&vcpu->kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(emulator_read_std); @@ -1611,9 +1803,7 @@ static int emulator_read_emulated(unsigned long addr, return X86EMUL_CONTINUE; } - down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); - up_read(&vcpu->kvm->slots_lock); /* For APIC access vmexit */ if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) @@ -1646,19 +1836,15 @@ mmio: return X86EMUL_UNHANDLEABLE; } -static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, - const void *val, int bytes) +int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, + const void *val, int bytes) { int ret; - down_read(&vcpu->kvm->slots_lock); ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); - if (ret < 0) { - up_read(&vcpu->kvm->slots_lock); + if (ret < 0) return 0; - } kvm_mmu_pte_write(vcpu, gpa, val, bytes); - up_read(&vcpu->kvm->slots_lock); return 1; } @@ -1670,9 +1856,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, struct kvm_io_device *mmio_dev; gpa_t gpa; - down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); - up_read(&vcpu->kvm->slots_lock); if (gpa == UNMAPPED_GVA) { kvm_inject_page_fault(vcpu, addr, 2); @@ -1749,7 +1933,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr, char *kaddr; u64 val; - down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); if (gpa == UNMAPPED_GVA || @@ -1769,9 +1952,8 @@ static int emulator_cmpxchg_emulated(unsigned long addr, set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); kunmap_atomic(kaddr, KM_USER0); kvm_release_page_dirty(page); - emul_write: - up_read(&vcpu->kvm->slots_lock); } +emul_write: #endif return emulator_write_emulated(addr, new, bytes, vcpu); @@ -1802,7 +1984,7 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) *dest = kvm_x86_ops->get_dr(vcpu, dr); return X86EMUL_CONTINUE; default: - pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr); + pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr); return X86EMUL_UNHANDLEABLE; } } @@ -1840,7 +2022,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) } EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); -struct x86_emulate_ops emulate_ops = { +static struct x86_emulate_ops emulate_ops = { .read_std = emulator_read_std, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, @@ -2091,6 +2273,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.guest_page_offset = 0; vcpu->arch.pio.rep = 0; + if (vcpu->run->io.direction == KVM_EXIT_IO_IN) + KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size, + handler); + else + KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, + handler); + kvm_x86_ops->cache_regs(vcpu); memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); kvm_x86_ops->decache_regs(vcpu); @@ -2129,6 +2318,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.guest_page_offset = offset_in_page(address); vcpu->arch.pio.rep = rep; + if (vcpu->run->io.direction == KVM_EXIT_IO_IN) + KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size, + handler); + else + KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, + handler); + if (!count) { kvm_x86_ops->skip_emulated_instruction(vcpu); return 1; @@ -2163,10 +2359,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, kvm_x86_ops->skip_emulated_instruction(vcpu); for (i = 0; i < nr_pages; ++i) { - down_read(&vcpu->kvm->slots_lock); page = gva_to_page(vcpu, address + i * PAGE_SIZE); vcpu->arch.pio.guest_pages[i] = page; - up_read(&vcpu->kvm->slots_lock); if (!page) { kvm_inject_gp(vcpu, 0); free_pio_guest_pages(vcpu); @@ -2238,10 +2432,13 @@ void kvm_arch_exit(void) int kvm_emulate_halt(struct kvm_vcpu *vcpu) { ++vcpu->stat.halt_exits; + KVMTRACE_0D(HLT, vcpu, handler); if (irqchip_in_kernel(vcpu->kvm)) { - vcpu->arch.mp_state = VCPU_MP_STATE_HALTED; + vcpu->arch.mp_state = KVM_MP_STATE_HALTED; + up_read(&vcpu->kvm->slots_lock); kvm_vcpu_block(vcpu); - if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE) + down_read(&vcpu->kvm->slots_lock); + if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) return -EINTR; return 1; } else { @@ -2251,9 +2448,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_halt); +static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, + unsigned long a1) +{ + if (is_long_mode(vcpu)) + return a0; + else + return a0 | ((gpa_t)a1 << 32); +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; + int r = 1; kvm_x86_ops->cache_regs(vcpu); @@ -2263,6 +2470,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) a2 = vcpu->arch.regs[VCPU_REGS_RDX]; a3 = vcpu->arch.regs[VCPU_REGS_RSI]; + KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); + if (!is_long_mode(vcpu)) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; @@ -2275,13 +2484,17 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_VAPIC_POLL_IRQ: ret = 0; break; + case KVM_HC_MMU_OP: + r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret); + break; default: ret = -KVM_ENOSYS; break; } vcpu->arch.regs[VCPU_REGS_RAX] = ret; kvm_x86_ops->decache_regs(vcpu); - return 0; + ++vcpu->stat.hypercalls; + return r; } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); @@ -2329,7 +2542,7 @@ void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, unsigned long *rflags) { - lmsw(vcpu, msw); + kvm_lmsw(vcpu, msw); *rflags = kvm_x86_ops->get_rflags(vcpu); } @@ -2346,9 +2559,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) case 4: return vcpu->arch.cr4; case 8: - return get_cr8(vcpu); + return kvm_get_cr8(vcpu); default: - vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); + vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); return 0; } } @@ -2358,23 +2571,23 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, { switch (cr) { case 0: - set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); + kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); *rflags = kvm_x86_ops->get_rflags(vcpu); break; case 2: vcpu->arch.cr2 = val; break; case 3: - set_cr3(vcpu, val); + kvm_set_cr3(vcpu, val); break; case 4: - set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); + kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); break; case 8: - set_cr8(vcpu, val & 0xfUL); + kvm_set_cr8(vcpu, val & 0xfUL); break; default: - vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); + vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); } } @@ -2447,6 +2660,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) } kvm_x86_ops->decache_regs(vcpu); kvm_x86_ops->skip_emulated_instruction(vcpu); + KVMTRACE_5D(CPUID, vcpu, function, + (u32)vcpu->arch.regs[VCPU_REGS_RAX], + (u32)vcpu->arch.regs[VCPU_REGS_RBX], + (u32)vcpu->arch.regs[VCPU_REGS_RCX], + (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler); } EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); @@ -2469,7 +2687,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; - kvm_run->cr8 = get_cr8(vcpu); + kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); if (irqchip_in_kernel(vcpu->kvm)) kvm_run->ready_for_interrupt_injection = 1; @@ -2509,16 +2727,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; - if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { pr_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, vcpu->arch.sipi_vector); kvm_lapic_reset(vcpu); r = kvm_x86_ops->vcpu_reset(vcpu); if (r) return r; - vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } + down_read(&vcpu->kvm->slots_lock); vapic_enter(vcpu); preempted: @@ -2526,6 +2745,10 @@ preempted: kvm_x86_ops->guest_debug_pre(vcpu); again: + if (vcpu->requests) + if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) + kvm_mmu_unload(vcpu); + r = kvm_mmu_reload(vcpu); if (unlikely(r)) goto out; @@ -2539,6 +2762,11 @@ again: r = 0; goto out; } + if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { + kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + r = 0; + goto out; + } } kvm_inject_pending_timer_irqs(vcpu); @@ -2557,6 +2785,14 @@ again: goto out; } + if (vcpu->requests) + if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) { + local_irq_enable(); + preempt_enable(); + r = 1; + goto out; + } + if (signal_pending(current)) { local_irq_enable(); preempt_enable(); @@ -2566,6 +2802,13 @@ again: goto out; } + vcpu->guest_mode = 1; + /* + * Make sure that guest_mode assignment won't happen after + * testing the pending IRQ vector bitmap. + */ + smp_wmb(); + if (vcpu->arch.exception.pending) __queue_exception(vcpu); else if (irqchip_in_kernel(vcpu->kvm)) @@ -2575,13 +2818,15 @@ again: kvm_lapic_sync_to_vapic(vcpu); - vcpu->guest_mode = 1; + up_read(&vcpu->kvm->slots_lock); + kvm_guest_enter(); if (vcpu->requests) if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) kvm_x86_ops->tlb_flush(vcpu); + KVMTRACE_0D(VMENTRY, vcpu, entryexit); kvm_x86_ops->run(vcpu, kvm_run); vcpu->guest_mode = 0; @@ -2601,6 +2846,8 @@ again: preempt_enable(); + down_read(&vcpu->kvm->slots_lock); + /* * Profile KVM exit RIPs: */ @@ -2628,14 +2875,18 @@ again: } out: + up_read(&vcpu->kvm->slots_lock); if (r > 0) { kvm_resched(vcpu); + down_read(&vcpu->kvm->slots_lock); goto preempted; } post_kvm_run_save(vcpu, kvm_run); + down_read(&vcpu->kvm->slots_lock); vapic_exit(vcpu); + up_read(&vcpu->kvm->slots_lock); return r; } @@ -2647,7 +2898,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load(vcpu); - if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) { + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { kvm_vcpu_block(vcpu); vcpu_put(vcpu); return -EAGAIN; @@ -2658,7 +2909,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* re-sync apic's tpr */ if (!irqchip_in_kernel(vcpu->kvm)) - set_cr8(vcpu, kvm_run->cr8); + kvm_set_cr8(vcpu, kvm_run->cr8); if (vcpu->arch.pio.cur_count) { r = complete_pio(vcpu); @@ -2670,9 +2921,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); vcpu->mmio_read_completed = 1; vcpu->mmio_needed = 0; + + down_read(&vcpu->kvm->slots_lock); r = emulate_instruction(vcpu, kvm_run, vcpu->arch.mmio_fault_cr2, 0, EMULTYPE_NO_DECODE); + up_read(&vcpu->kvm->slots_lock); if (r == EMULATE_DO_MMIO) { /* * Read-modify-write. Back to userspace. @@ -2773,7 +3027,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) static void get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { - return kvm_x86_ops->get_segment(vcpu, var, seg); + kvm_x86_ops->get_segment(vcpu, var, seg); } void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) @@ -2816,7 +3070,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->cr2 = vcpu->arch.cr2; sregs->cr3 = vcpu->arch.cr3; sregs->cr4 = vcpu->arch.cr4; - sregs->cr8 = get_cr8(vcpu); + sregs->cr8 = kvm_get_cr8(vcpu); sregs->efer = vcpu->arch.shadow_efer; sregs->apic_base = kvm_get_apic_base(vcpu); @@ -2836,12 +3090,438 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, return 0; } +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + vcpu_load(vcpu); + mp_state->mp_state = vcpu->arch.mp_state; + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + vcpu_load(vcpu); + vcpu->arch.mp_state = mp_state->mp_state; + vcpu_put(vcpu); + return 0; +} + static void set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { - return kvm_x86_ops->set_segment(vcpu, var, seg); + kvm_x86_ops->set_segment(vcpu, var, seg); +} + +static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, + struct kvm_segment *kvm_desct) +{ + kvm_desct->base = seg_desc->base0; + kvm_desct->base |= seg_desc->base1 << 16; + kvm_desct->base |= seg_desc->base2 << 24; + kvm_desct->limit = seg_desc->limit0; + kvm_desct->limit |= seg_desc->limit << 16; + kvm_desct->selector = selector; + kvm_desct->type = seg_desc->type; + kvm_desct->present = seg_desc->p; + kvm_desct->dpl = seg_desc->dpl; + kvm_desct->db = seg_desc->d; + kvm_desct->s = seg_desc->s; + kvm_desct->l = seg_desc->l; + kvm_desct->g = seg_desc->g; + kvm_desct->avl = seg_desc->avl; + if (!selector) + kvm_desct->unusable = 1; + else + kvm_desct->unusable = 0; + kvm_desct->padding = 0; +} + +static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, + u16 selector, + struct descriptor_table *dtable) +{ + if (selector & 1 << 2) { + struct kvm_segment kvm_seg; + + get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); + + if (kvm_seg.unusable) + dtable->limit = 0; + else + dtable->limit = kvm_seg.limit; + dtable->base = kvm_seg.base; + } + else + kvm_x86_ops->get_gdt(vcpu, dtable); +} + +/* allowed just for 8 bytes segments */ +static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + struct desc_struct *seg_desc) +{ + struct descriptor_table dtable; + u16 index = selector >> 3; + + get_segment_descritptor_dtable(vcpu, selector, &dtable); + + if (dtable.limit < index * 8 + 7) { + kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); + return 1; + } + return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); +} + +/* allowed just for 8 bytes segments */ +static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + struct desc_struct *seg_desc) +{ + struct descriptor_table dtable; + u16 index = selector >> 3; + + get_segment_descritptor_dtable(vcpu, selector, &dtable); + + if (dtable.limit < index * 8 + 7) + return 1; + return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); +} + +static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc) +{ + u32 base_addr; + + base_addr = seg_desc->base0; + base_addr |= (seg_desc->base1 << 16); + base_addr |= (seg_desc->base2 << 24); + + return base_addr; +} + +static int load_tss_segment32(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc, + struct tss_segment_32 *tss) +{ + u32 base_addr; + + base_addr = get_tss_base_addr(vcpu, seg_desc); + + return kvm_read_guest(vcpu->kvm, base_addr, tss, + sizeof(struct tss_segment_32)); +} + +static int save_tss_segment32(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc, + struct tss_segment_32 *tss) +{ + u32 base_addr; + + base_addr = get_tss_base_addr(vcpu, seg_desc); + + return kvm_write_guest(vcpu->kvm, base_addr, tss, + sizeof(struct tss_segment_32)); +} + +static int load_tss_segment16(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc, + struct tss_segment_16 *tss) +{ + u32 base_addr; + + base_addr = get_tss_base_addr(vcpu, seg_desc); + + return kvm_read_guest(vcpu->kvm, base_addr, tss, + sizeof(struct tss_segment_16)); +} + +static int save_tss_segment16(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc, + struct tss_segment_16 *tss) +{ + u32 base_addr; + + base_addr = get_tss_base_addr(vcpu, seg_desc); + + return kvm_write_guest(vcpu->kvm, base_addr, tss, + sizeof(struct tss_segment_16)); +} + +static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) +{ + struct kvm_segment kvm_seg; + + get_segment(vcpu, &kvm_seg, seg); + return kvm_seg.selector; +} + +static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, + u16 selector, + struct kvm_segment *kvm_seg) +{ + struct desc_struct seg_desc; + + if (load_guest_segment_descriptor(vcpu, selector, &seg_desc)) + return 1; + seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg); + return 0; +} + +static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + int type_bits, int seg) +{ + struct kvm_segment kvm_seg; + + if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) + return 1; + kvm_seg.type |= type_bits; + + if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && + seg != VCPU_SREG_LDTR) + if (!kvm_seg.s) + kvm_seg.unusable = 1; + + set_segment(vcpu, &kvm_seg, seg); + return 0; +} + +static void save_state_to_tss32(struct kvm_vcpu *vcpu, + struct tss_segment_32 *tss) +{ + tss->cr3 = vcpu->arch.cr3; + tss->eip = vcpu->arch.rip; + tss->eflags = kvm_x86_ops->get_rflags(vcpu); + tss->eax = vcpu->arch.regs[VCPU_REGS_RAX]; + tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX]; + tss->edx = vcpu->arch.regs[VCPU_REGS_RDX]; + tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX]; + tss->esp = vcpu->arch.regs[VCPU_REGS_RSP]; + tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP]; + tss->esi = vcpu->arch.regs[VCPU_REGS_RSI]; + tss->edi = vcpu->arch.regs[VCPU_REGS_RDI]; + + tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); + tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); + tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); + tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); + tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); + tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); + tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); + tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); +} + +static int load_state_from_tss32(struct kvm_vcpu *vcpu, + struct tss_segment_32 *tss) +{ + kvm_set_cr3(vcpu, tss->cr3); + + vcpu->arch.rip = tss->eip; + kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); + + vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax; + vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx; + vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx; + vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx; + vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp; + vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp; + vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; + vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; + + if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) + return 1; + + if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) + return 1; + + if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) + return 1; + + if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) + return 1; + + if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) + return 1; + + if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) + return 1; + + if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) + return 1; + return 0; +} + +static void save_state_to_tss16(struct kvm_vcpu *vcpu, + struct tss_segment_16 *tss) +{ + tss->ip = vcpu->arch.rip; + tss->flag = kvm_x86_ops->get_rflags(vcpu); + tss->ax = vcpu->arch.regs[VCPU_REGS_RAX]; + tss->cx = vcpu->arch.regs[VCPU_REGS_RCX]; + tss->dx = vcpu->arch.regs[VCPU_REGS_RDX]; + tss->bx = vcpu->arch.regs[VCPU_REGS_RBX]; + tss->sp = vcpu->arch.regs[VCPU_REGS_RSP]; + tss->bp = vcpu->arch.regs[VCPU_REGS_RBP]; + tss->si = vcpu->arch.regs[VCPU_REGS_RSI]; + tss->di = vcpu->arch.regs[VCPU_REGS_RDI]; + + tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); + tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); + tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); + tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); + tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); + tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); +} + +static int load_state_from_tss16(struct kvm_vcpu *vcpu, + struct tss_segment_16 *tss) +{ + vcpu->arch.rip = tss->ip; + kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); + vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax; + vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx; + vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx; + vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx; + vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp; + vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp; + vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; + vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; + + if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) + return 1; + + if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) + return 1; + + if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) + return 1; + + if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) + return 1; + + if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) + return 1; + return 0; +} + +int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, + struct desc_struct *cseg_desc, + struct desc_struct *nseg_desc) +{ + struct tss_segment_16 tss_segment_16; + int ret = 0; + + if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) + goto out; + + save_state_to_tss16(vcpu, &tss_segment_16); + save_tss_segment16(vcpu, cseg_desc, &tss_segment_16); + + if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) + goto out; + if (load_state_from_tss16(vcpu, &tss_segment_16)) + goto out; + + ret = 1; +out: + return ret; +} + +int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, + struct desc_struct *cseg_desc, + struct desc_struct *nseg_desc) +{ + struct tss_segment_32 tss_segment_32; + int ret = 0; + + if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) + goto out; + + save_state_to_tss32(vcpu, &tss_segment_32); + save_tss_segment32(vcpu, cseg_desc, &tss_segment_32); + + if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) + goto out; + if (load_state_from_tss32(vcpu, &tss_segment_32)) + goto out; + + ret = 1; +out: + return ret; } +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) +{ + struct kvm_segment tr_seg; + struct desc_struct cseg_desc; + struct desc_struct nseg_desc; + int ret = 0; + + get_segment(vcpu, &tr_seg, VCPU_SREG_TR); + + if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) + goto out; + + if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) + goto out; + + + if (reason != TASK_SWITCH_IRET) { + int cpl; + + cpl = kvm_x86_ops->get_cpl(vcpu); + if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } + } + + if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) { + kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); + return 1; + } + + if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { + cseg_desc.type &= ~(1 << 8); //clear the B flag + save_guest_segment_descriptor(vcpu, tr_seg.selector, + &cseg_desc); + } + + if (reason == TASK_SWITCH_IRET) { + u32 eflags = kvm_x86_ops->get_rflags(vcpu); + kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); + } + + kvm_x86_ops->skip_emulated_instruction(vcpu); + kvm_x86_ops->cache_regs(vcpu); + + if (nseg_desc.type & 8) + ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, + &nseg_desc); + else + ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, + &nseg_desc); + + if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { + u32 eflags = kvm_x86_ops->get_rflags(vcpu); + kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT); + } + + if (reason != TASK_SWITCH_IRET) { + nseg_desc.type |= (1 << 8); + save_guest_segment_descriptor(vcpu, tss_selector, + &nseg_desc); + } + + kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); + seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); + tr_seg.type = 11; + set_segment(vcpu, &tr_seg, VCPU_SREG_TR); +out: + kvm_x86_ops->decache_regs(vcpu); + return ret; +} +EXPORT_SYMBOL_GPL(kvm_task_switch); + int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -2862,12 +3542,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; vcpu->arch.cr3 = sregs->cr3; - set_cr8(vcpu, sregs->cr8); + kvm_set_cr8(vcpu, sregs->cr8); mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; -#ifdef CONFIG_X86_64 kvm_x86_ops->set_efer(vcpu, sregs->efer); -#endif kvm_set_apic_base(vcpu, sregs->apic_base); kvm_x86_ops->decache_cr4_guest_bits(vcpu); @@ -3141,9 +3819,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.mmu.root_hpa = INVALID_PAGE; if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0) - vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else - vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED; + vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { @@ -3175,7 +3853,9 @@ fail: void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { kvm_free_lapic(vcpu); + down_read(&vcpu->kvm->slots_lock); kvm_mmu_destroy(vcpu); + up_read(&vcpu->kvm->slots_lock); free_page((unsigned long)vcpu->arch.pio_data); } @@ -3219,10 +3899,13 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { + kvm_free_pit(kvm); kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); kvm_free_vcpus(kvm); kvm_free_physmem(kvm); + if (kvm->arch.apic_access_page) + put_page(kvm->arch.apic_access_page); kfree(kvm); } @@ -3278,8 +3961,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm, int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - return vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE - || vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED; + return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE + || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; } static void vcpu_kick_intr(void *info) @@ -3293,11 +3976,17 @@ static void vcpu_kick_intr(void *info) void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { int ipi_pcpu = vcpu->cpu; + int cpu = get_cpu(); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); ++vcpu->stat.halt_wakeup; } - if (vcpu->guest_mode) + /* + * We may be called synchronously with irqs disabled in guest mode, + * So need not to call smp_call_function_single() in that case. + */ + if (vcpu->guest_mode && vcpu->cpu != cpu) smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); + put_cpu(); } diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 79586003397..2ca08386f99 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -65,6 +65,14 @@ #define MemAbs (1<<9) /* Memory operand is absolute displacement */ #define String (1<<10) /* String instruction (rep capable) */ #define Stack (1<<11) /* Stack instruction (push/pop) */ +#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ +#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ +#define GroupMask 0xff /* Group number stored in bits 0:7 */ + +enum { + Group1_80, Group1_81, Group1_82, Group1_83, + Group1A, Group3_Byte, Group3, Group4, Group5, Group7, +}; static u16 opcode_table[256] = { /* 0x00 - 0x07 */ @@ -123,14 +131,14 @@ static u16 opcode_table[256] = { ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0x80 - 0x87 */ - ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, - ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, + Group | Group1_80, Group | Group1_81, + Group | Group1_82, Group | Group1_83, ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, /* 0x88 - 0x8F */ ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, - 0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov | Stack, + 0, ModRM | DstReg, 0, Group | Group1A, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, @@ -164,16 +172,15 @@ static u16 opcode_table[256] = { 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, - ImplicitOps, ImplicitOps, - ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, + ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, /* 0xF8 - 0xFF */ ImplicitOps, 0, ImplicitOps, ImplicitOps, - 0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM + 0, 0, Group | Group4, Group | Group5, }; static u16 twobyte_table[256] = { /* 0x00 - 0x0F */ - 0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0, + 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, /* 0x10 - 0x1F */ 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, @@ -229,6 +236,56 @@ static u16 twobyte_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +static u16 group_table[] = { + [Group1_80*8] = + ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, + ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, + ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, + ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, + [Group1_81*8] = + DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, + DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, + DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, + DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, + [Group1_82*8] = + ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, + ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, + ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, + ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, + [Group1_83*8] = + DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, + DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, + DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, + DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, + [Group1A*8] = + DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, + [Group3_Byte*8] = + ByteOp | SrcImm | DstMem | ModRM, 0, + ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, + 0, 0, 0, 0, + [Group3*8] = + DstMem | SrcImm | ModRM | SrcImm, 0, + DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, + 0, 0, 0, 0, + [Group4*8] = + ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, + 0, 0, 0, 0, 0, 0, + [Group5*8] = + DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0, + SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0, + [Group7*8] = + 0, 0, ModRM | SrcMem, ModRM | SrcMem, + SrcNone | ModRM | DstMem | Mov, 0, + SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, +}; + +static u16 group2_table[] = { + [Group7*8] = + SrcNone | ModRM, 0, 0, 0, + SrcNone | ModRM | DstMem | Mov, 0, + SrcMem16 | ModRM | Mov, 0, +}; + /* EFLAGS bit definitions. */ #define EFLG_OF (1<<11) #define EFLG_DF (1<<10) @@ -317,7 +374,7 @@ static u16 twobyte_table[256] = { #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ do { \ - unsigned long _tmp; \ + unsigned long __tmp; \ switch ((_dst).bytes) { \ case 1: \ __asm__ __volatile__ ( \ @@ -325,7 +382,7 @@ static u16 twobyte_table[256] = { _op"b %"_bx"3,%1; " \ _POST_EFLAGS("0", "4", "2") \ : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ + "=&r" (__tmp) \ : _by ((_src).val), "i" (EFLAGS_MASK)); \ break; \ default: \ @@ -426,29 +483,40 @@ static u16 twobyte_table[256] = { (_type)_x; \ }) +static inline unsigned long ad_mask(struct decode_cache *c) +{ + return (1UL << (c->ad_bytes << 3)) - 1; +} + /* Access/update address held in a register, based on addressing mode. */ -#define address_mask(reg) \ - ((c->ad_bytes == sizeof(unsigned long)) ? \ - (reg) : ((reg) & ((1UL << (c->ad_bytes << 3)) - 1))) -#define register_address(base, reg) \ - ((base) + address_mask(reg)) -#define register_address_increment(reg, inc) \ - do { \ - /* signed type ensures sign extension to long */ \ - int _inc = (inc); \ - if (c->ad_bytes == sizeof(unsigned long)) \ - (reg) += _inc; \ - else \ - (reg) = ((reg) & \ - ~((1UL << (c->ad_bytes << 3)) - 1)) | \ - (((reg) + _inc) & \ - ((1UL << (c->ad_bytes << 3)) - 1)); \ - } while (0) +static inline unsigned long +address_mask(struct decode_cache *c, unsigned long reg) +{ + if (c->ad_bytes == sizeof(unsigned long)) + return reg; + else + return reg & ad_mask(c); +} -#define JMP_REL(rel) \ - do { \ - register_address_increment(c->eip, rel); \ - } while (0) +static inline unsigned long +register_address(struct decode_cache *c, unsigned long base, unsigned long reg) +{ + return base + address_mask(c, reg); +} + +static inline void +register_address_increment(struct decode_cache *c, unsigned long *reg, int inc) +{ + if (c->ad_bytes == sizeof(unsigned long)) + *reg += inc; + else + *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c)); +} + +static inline void jmp_rel(struct decode_cache *c, int rel) +{ + register_address_increment(c, &c->eip, rel); +} static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, @@ -763,7 +831,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct decode_cache *c = &ctxt->decode; int rc = 0; int mode = ctxt->mode; - int def_op_bytes, def_ad_bytes; + int def_op_bytes, def_ad_bytes, group; /* Shadow copy of register state. Committed on successful emulation. */ @@ -864,12 +932,24 @@ done_prefixes: c->b = insn_fetch(u8, 1, c->eip); c->d = twobyte_table[c->b]; } + } - /* Unrecognised? */ - if (c->d == 0) { - DPRINTF("Cannot emulate %02x\n", c->b); - return -1; - } + if (c->d & Group) { + group = c->d & GroupMask; + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + + group = (group << 3) + ((c->modrm >> 3) & 7); + if ((c->d & GroupDual) && (c->modrm >> 6) == 3) + c->d = group2_table[group]; + else + c->d = group_table[group]; + } + + /* Unrecognised? */ + if (c->d == 0) { + DPRINTF("Cannot emulate %02x\n", c->b); + return -1; } if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) @@ -924,6 +1004,7 @@ done_prefixes: */ if ((c->d & ModRM) && c->modrm_mod == 3) { c->src.type = OP_REG; + c->src.val = c->modrm_val; break; } c->src.type = OP_MEM; @@ -967,6 +1048,7 @@ done_prefixes: case DstMem: if ((c->d & ModRM) && c->modrm_mod == 3) { c->dst.type = OP_REG; + c->dst.val = c->dst.orig_val = c->modrm_val; break; } c->dst.type = OP_MEM; @@ -984,8 +1066,8 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) c->dst.type = OP_MEM; c->dst.bytes = c->op_bytes; c->dst.val = c->src.val; - register_address_increment(c->regs[VCPU_REGS_RSP], -c->op_bytes); - c->dst.ptr = (void *) register_address(ctxt->ss_base, + register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); + c->dst.ptr = (void *) register_address(c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]); } @@ -995,13 +1077,13 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; int rc; - rc = ops->read_std(register_address(ctxt->ss_base, + rc = ops->read_std(register_address(c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]), &c->dst.val, c->dst.bytes, ctxt->vcpu); if (rc != 0) return rc; - register_address_increment(c->regs[VCPU_REGS_RSP], c->dst.bytes); + register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); return 0; } @@ -1043,26 +1125,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, switch (c->modrm_reg) { case 0 ... 1: /* test */ - /* - * Special case in Grp3: test has an immediate - * source operand. - */ - c->src.type = OP_IMM; - c->src.ptr = (unsigned long *)c->eip; - c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - if (c->src.bytes == 8) - c->src.bytes = 4; - switch (c->src.bytes) { - case 1: - c->src.val = insn_fetch(s8, 1, c->eip); - break; - case 2: - c->src.val = insn_fetch(s16, 2, c->eip); - break; - case 4: - c->src.val = insn_fetch(s32, 4, c->eip); - break; - } emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); break; case 2: /* not */ @@ -1076,7 +1138,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, rc = X86EMUL_UNHANDLEABLE; break; } -done: return rc; } @@ -1084,7 +1145,6 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - int rc; switch (c->modrm_reg) { case 0: /* inc */ @@ -1094,36 +1154,11 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, emulate_1op("dec", c->dst, ctxt->eflags); break; case 4: /* jmp abs */ - if (c->b == 0xff) - c->eip = c->dst.val; - else { - DPRINTF("Cannot emulate %02x\n", c->b); - return X86EMUL_UNHANDLEABLE; - } + c->eip = c->src.val; break; case 6: /* push */ - - /* 64-bit mode: PUSH always pushes a 64-bit operand. */ - - if (ctxt->mode == X86EMUL_MODE_PROT64) { - c->dst.bytes = 8; - rc = ops->read_std((unsigned long)c->dst.ptr, - &c->dst.val, 8, ctxt->vcpu); - if (rc != 0) - return rc; - } - register_address_increment(c->regs[VCPU_REGS_RSP], - -c->dst.bytes); - rc = ops->write_emulated(register_address(ctxt->ss_base, - c->regs[VCPU_REGS_RSP]), &c->dst.val, - c->dst.bytes, ctxt->vcpu); - if (rc != 0) - return rc; - c->dst.type = OP_NONE; + emulate_push(ctxt); break; - default: - DPRINTF("Cannot emulate %02x\n", c->b); - return X86EMUL_UNHANDLEABLE; } return 0; } @@ -1361,19 +1396,19 @@ special_insn: c->dst.type = OP_MEM; c->dst.bytes = c->op_bytes; c->dst.val = c->src.val; - register_address_increment(c->regs[VCPU_REGS_RSP], + register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); c->dst.ptr = (void *) register_address( - ctxt->ss_base, c->regs[VCPU_REGS_RSP]); + c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]); break; case 0x58 ... 0x5f: /* pop reg */ pop_instruction: - if ((rc = ops->read_std(register_address(ctxt->ss_base, + if ((rc = ops->read_std(register_address(c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]), c->dst.ptr, c->op_bytes, ctxt->vcpu)) != 0) goto done; - register_address_increment(c->regs[VCPU_REGS_RSP], + register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->op_bytes); c->dst.type = OP_NONE; /* Disable writeback. */ break; @@ -1393,9 +1428,9 @@ special_insn: 1, (c->d & ByteOp) ? 1 : c->op_bytes, c->rep_prefix ? - address_mask(c->regs[VCPU_REGS_RCX]) : 1, + address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, (ctxt->eflags & EFLG_DF), - register_address(ctxt->es_base, + register_address(c, ctxt->es_base, c->regs[VCPU_REGS_RDI]), c->rep_prefix, c->regs[VCPU_REGS_RDX]) == 0) { @@ -1409,9 +1444,9 @@ special_insn: 0, (c->d & ByteOp) ? 1 : c->op_bytes, c->rep_prefix ? - address_mask(c->regs[VCPU_REGS_RCX]) : 1, + address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, (ctxt->eflags & EFLG_DF), - register_address(c->override_base ? + register_address(c, c->override_base ? *c->override_base : ctxt->ds_base, c->regs[VCPU_REGS_RSI]), @@ -1425,7 +1460,7 @@ special_insn: int rel = insn_fetch(s8, 1, c->eip); if (test_cc(c->b, ctxt->eflags)) - JMP_REL(rel); + jmp_rel(c, rel); break; } case 0x80 ... 0x83: /* Grp1 */ @@ -1477,7 +1512,7 @@ special_insn: case 0x88 ... 0x8b: /* mov */ goto mov; case 0x8d: /* lea r16/r32, m */ - c->dst.val = c->modrm_val; + c->dst.val = c->modrm_ea; break; case 0x8f: /* pop (sole member of Grp1a) */ rc = emulate_grp1a(ctxt, ops); @@ -1501,27 +1536,27 @@ special_insn: case 0xa4 ... 0xa5: /* movs */ c->dst.type = OP_MEM; c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->dst.ptr = (unsigned long *)register_address( + c->dst.ptr = (unsigned long *)register_address(c, ctxt->es_base, c->regs[VCPU_REGS_RDI]); - if ((rc = ops->read_emulated(register_address( + if ((rc = ops->read_emulated(register_address(c, c->override_base ? *c->override_base : ctxt->ds_base, c->regs[VCPU_REGS_RSI]), &c->dst.val, c->dst.bytes, ctxt->vcpu)) != 0) goto done; - register_address_increment(c->regs[VCPU_REGS_RSI], + register_address_increment(c, &c->regs[VCPU_REGS_RSI], (ctxt->eflags & EFLG_DF) ? -c->dst.bytes : c->dst.bytes); - register_address_increment(c->regs[VCPU_REGS_RDI], + register_address_increment(c, &c->regs[VCPU_REGS_RDI], (ctxt->eflags & EFLG_DF) ? -c->dst.bytes : c->dst.bytes); break; case 0xa6 ... 0xa7: /* cmps */ c->src.type = OP_NONE; /* Disable writeback. */ c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->src.ptr = (unsigned long *)register_address( + c->src.ptr = (unsigned long *)register_address(c, c->override_base ? *c->override_base : ctxt->ds_base, c->regs[VCPU_REGS_RSI]); @@ -1533,7 +1568,7 @@ special_insn: c->dst.type = OP_NONE; /* Disable writeback. */ c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->dst.ptr = (unsigned long *)register_address( + c->dst.ptr = (unsigned long *)register_address(c, ctxt->es_base, c->regs[VCPU_REGS_RDI]); if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, @@ -1546,10 +1581,10 @@ special_insn: emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); - register_address_increment(c->regs[VCPU_REGS_RSI], + register_address_increment(c, &c->regs[VCPU_REGS_RSI], (ctxt->eflags & EFLG_DF) ? -c->src.bytes : c->src.bytes); - register_address_increment(c->regs[VCPU_REGS_RDI], + register_address_increment(c, &c->regs[VCPU_REGS_RDI], (ctxt->eflags & EFLG_DF) ? -c->dst.bytes : c->dst.bytes); @@ -1557,11 +1592,11 @@ special_insn: case 0xaa ... 0xab: /* stos */ c->dst.type = OP_MEM; c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->dst.ptr = (unsigned long *)register_address( + c->dst.ptr = (unsigned long *)register_address(c, ctxt->es_base, c->regs[VCPU_REGS_RDI]); c->dst.val = c->regs[VCPU_REGS_RAX]; - register_address_increment(c->regs[VCPU_REGS_RDI], + register_address_increment(c, &c->regs[VCPU_REGS_RDI], (ctxt->eflags & EFLG_DF) ? -c->dst.bytes : c->dst.bytes); break; @@ -1569,7 +1604,7 @@ special_insn: c->dst.type = OP_REG; c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; - if ((rc = ops->read_emulated(register_address( + if ((rc = ops->read_emulated(register_address(c, c->override_base ? *c->override_base : ctxt->ds_base, c->regs[VCPU_REGS_RSI]), @@ -1577,7 +1612,7 @@ special_insn: c->dst.bytes, ctxt->vcpu)) != 0) goto done; - register_address_increment(c->regs[VCPU_REGS_RSI], + register_address_increment(c, &c->regs[VCPU_REGS_RSI], (ctxt->eflags & EFLG_DF) ? -c->dst.bytes : c->dst.bytes); break; @@ -1616,14 +1651,14 @@ special_insn: goto cannot_emulate; } c->src.val = (unsigned long) c->eip; - JMP_REL(rel); + jmp_rel(c, rel); c->op_bytes = c->ad_bytes; emulate_push(ctxt); break; } case 0xe9: /* jmp rel */ case 0xeb: /* jmp rel short */ - JMP_REL(c->src.val); + jmp_rel(c, c->src.val); c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ @@ -1690,6 +1725,8 @@ twobyte_insn: goto done; kvm_emulate_hypercall(ctxt->vcpu); + /* Disable writeback. */ + c->dst.type = OP_NONE; break; case 2: /* lgdt */ rc = read_descriptor(ctxt, ops, c->src.ptr, @@ -1697,6 +1734,8 @@ twobyte_insn: if (rc) goto done; realmode_lgdt(ctxt->vcpu, size, address); + /* Disable writeback. */ + c->dst.type = OP_NONE; break; case 3: /* lidt/vmmcall */ if (c->modrm_mod == 3 && c->modrm_rm == 1) { @@ -1712,27 +1751,25 @@ twobyte_insn: goto done; realmode_lidt(ctxt->vcpu, size, address); } + /* Disable writeback. */ + c->dst.type = OP_NONE; break; case 4: /* smsw */ - if (c->modrm_mod != 3) - goto cannot_emulate; - *(u16 *)&c->regs[c->modrm_rm] - = realmode_get_cr(ctxt->vcpu, 0); + c->dst.bytes = 2; + c->dst.val = realmode_get_cr(ctxt->vcpu, 0); break; case 6: /* lmsw */ - if (c->modrm_mod != 3) - goto cannot_emulate; - realmode_lmsw(ctxt->vcpu, (u16)c->modrm_val, - &ctxt->eflags); + realmode_lmsw(ctxt->vcpu, (u16)c->src.val, + &ctxt->eflags); break; case 7: /* invlpg*/ emulate_invlpg(ctxt->vcpu, memop); + /* Disable writeback. */ + c->dst.type = OP_NONE; break; default: goto cannot_emulate; } - /* Disable writeback. */ - c->dst.type = OP_NONE; break; case 0x06: emulate_clts(ctxt->vcpu); @@ -1823,7 +1860,7 @@ twobyte_insn: goto cannot_emulate; } if (test_cc(c->b, ctxt->eflags)) - JMP_REL(rel); + jmp_rel(c, rel); c->dst.type = OP_NONE; break; } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 25df1c1989f..76f60f52a88 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -11,7 +11,7 @@ lib-y += memcpy_$(BITS).o ifeq ($(CONFIG_X86_32),y) lib-y += checksum_32.o lib-y += strstr_32.o - lib-y += bitops_32.o semaphore_32.o string_32.o + lib-y += semaphore_32.o string_32.o lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else @@ -21,7 +21,6 @@ else lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o - lib-y += bitops_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o endif diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c deleted file mode 100644 index b6544045985..00000000000 --- a/arch/x86/lib/bitops_32.c +++ /dev/null @@ -1,70 +0,0 @@ -#include <linux/bitops.h> -#include <linux/module.h> - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -int find_next_bit(const unsigned long *addr, int size, int offset) -{ - const unsigned long *p = addr + (offset >> 5); - int set = 0, bit = offset & 31, res; - - if (bit) { - /* - * Look for nonzero in the first 32 bits: - */ - __asm__("bsfl %1,%0\n\t" - "jne 1f\n\t" - "movl $32, %0\n" - "1:" - : "=r" (set) - : "r" (*p >> bit)); - if (set < (32 - bit)) - return set + offset; - set = 32 - bit; - p++; - } - /* - * No set bit yet, search remaining full words for a bit - */ - res = find_first_bit (p, size - 32 * (p - addr)); - return (offset + set + res); -} -EXPORT_SYMBOL(find_next_bit); - -/** - * find_next_zero_bit - find the first zero bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -int find_next_zero_bit(const unsigned long *addr, int size, int offset) -{ - const unsigned long *p = addr + (offset >> 5); - int set = 0, bit = offset & 31, res; - - if (bit) { - /* - * Look for zero in the first 32 bits. - */ - __asm__("bsfl %1,%0\n\t" - "jne 1f\n\t" - "movl $32, %0\n" - "1:" - : "=r" (set) - : "r" (~(*p >> bit))); - if (set < (32 - bit)) - return set + offset; - set = 32 - bit; - p++; - } - /* - * No zero yet, search remaining full bytes for a zero - */ - res = find_first_zero_bit(p, size - 32 * (p - addr)); - return (offset + set + res); -} -EXPORT_SYMBOL(find_next_zero_bit); diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c deleted file mode 100644 index 0e8f491e6cc..00000000000 --- a/arch/x86/lib/bitops_64.c +++ /dev/null @@ -1,175 +0,0 @@ -#include <linux/bitops.h> - -#undef find_first_zero_bit -#undef find_next_zero_bit -#undef find_first_bit -#undef find_next_bit - -static inline long -__find_first_zero_bit(const unsigned long * addr, unsigned long size) -{ - long d0, d1, d2; - long res; - - /* - * We must test the size in words, not in bits, because - * otherwise incoming sizes in the range -63..-1 will not run - * any scasq instructions, and then the flags used by the je - * instruction will have whatever random value was in place - * before. Nobody should call us like that, but - * find_next_zero_bit() does when offset and size are at the - * same word and it fails to find a zero itself. - */ - size += 63; - size >>= 6; - if (!size) - return 0; - asm volatile( - " repe; scasq\n" - " je 1f\n" - " xorq -8(%%rdi),%%rax\n" - " subq $8,%%rdi\n" - " bsfq %%rax,%%rdx\n" - "1: subq %[addr],%%rdi\n" - " shlq $3,%%rdi\n" - " addq %%rdi,%%rdx" - :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) - :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL), - [addr] "S" (addr) : "memory"); - /* - * Any register would do for [addr] above, but GCC tends to - * prefer rbx over rsi, even though rsi is readily available - * and doesn't have to be saved. - */ - return res; -} - -/** - * find_first_zero_bit - find the first zero bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search - * - * Returns the bit-number of the first zero bit, not the number of the byte - * containing a bit. - */ -long find_first_zero_bit(const unsigned long * addr, unsigned long size) -{ - return __find_first_zero_bit (addr, size); -} - -/** - * find_next_zero_bit - find the next zero bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -long find_next_zero_bit (const unsigned long * addr, long size, long offset) -{ - const unsigned long * p = addr + (offset >> 6); - unsigned long set = 0; - unsigned long res, bit = offset&63; - - if (bit) { - /* - * Look for zero in first word - */ - asm("bsfq %1,%0\n\t" - "cmoveq %2,%0" - : "=r" (set) - : "r" (~(*p >> bit)), "r"(64L)); - if (set < (64 - bit)) - return set + offset; - set = 64 - bit; - p++; - } - /* - * No zero yet, search remaining full words for a zero - */ - res = __find_first_zero_bit (p, size - 64 * (p - addr)); - - return (offset + set + res); -} - -static inline long -__find_first_bit(const unsigned long * addr, unsigned long size) -{ - long d0, d1; - long res; - - /* - * We must test the size in words, not in bits, because - * otherwise incoming sizes in the range -63..-1 will not run - * any scasq instructions, and then the flags used by the jz - * instruction will have whatever random value was in place - * before. Nobody should call us like that, but - * find_next_bit() does when offset and size are at the same - * word and it fails to find a one itself. - */ - size += 63; - size >>= 6; - if (!size) - return 0; - asm volatile( - " repe; scasq\n" - " jz 1f\n" - " subq $8,%%rdi\n" - " bsfq (%%rdi),%%rax\n" - "1: subq %[addr],%%rdi\n" - " shlq $3,%%rdi\n" - " addq %%rdi,%%rax" - :"=a" (res), "=&c" (d0), "=&D" (d1) - :"0" (0ULL), "1" (size), "2" (addr), - [addr] "r" (addr) : "memory"); - return res; -} - -/** - * find_first_bit - find the first set bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search - * - * Returns the bit-number of the first set bit, not the number of the byte - * containing a bit. - */ -long find_first_bit(const unsigned long * addr, unsigned long size) -{ - return __find_first_bit(addr,size); -} - -/** - * find_next_bit - find the first set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -long find_next_bit(const unsigned long * addr, long size, long offset) -{ - const unsigned long * p = addr + (offset >> 6); - unsigned long set = 0, bit = offset & 63, res; - - if (bit) { - /* - * Look for nonzero in the first 64 bits: - */ - asm("bsfq %1,%0\n\t" - "cmoveq %2,%0\n\t" - : "=r" (set) - : "r" (*p >> bit), "r" (64L)); - if (set < (64 - bit)) - return set + offset; - set = 64 - bit; - p++; - } - /* - * No set bit yet, search remaining full words for a bit - */ - res = __find_first_bit (p, size - 64 * (p - addr)); - return (offset + set + res); -} - -#include <linux/module.h> - -EXPORT_SYMBOL(find_next_bit); -EXPORT_SYMBOL(find_first_bit); -EXPORT_SYMBOL(find_first_zero_bit); -EXPORT_SYMBOL(find_next_zero_bit); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index d05722121d2..8acbf0cdf1a 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -113,7 +113,7 @@ static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi) for_each_online_cpu(cpu) { if (cpuset & (1 << cpu)) { #ifdef VOYAGER_DEBUG - if (!cpu_isset(cpu, cpu_online_map)) + if (!cpu_online(cpu)) VDEBUG(("CPU%d sending cpi %d to CPU%d not in " "cpu_online_map\n", hard_smp_processor_id(), cpi, cpu)); @@ -543,8 +543,8 @@ static void __init do_boot_cpu(__u8 cpu) hijack_source.idt.Offset, stack_start.sp)); /* init lowmem identity mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); flush_tlb_all(); if (quad_boot) { @@ -683,9 +683,9 @@ void __init smp_boot_cpus(void) * Code added from smpboot.c */ { unsigned long bogosum = 0; - for (i = 0; i < NR_CPUS; i++) - if (cpu_isset(i, cpu_online_map)) - bogosum += cpu_data(i).loops_per_jiffy; + + for_each_online_cpu(i) + bogosum += cpu_data(i).loops_per_jiffy; printk(KERN_INFO "Total of %d processors activated " "(%lu.%02lu BogoMIPS).\n", cpucount + 1, bogosum / (500000 / HZ), @@ -1838,7 +1838,7 @@ static int __cpuinit voyager_cpu_up(unsigned int cpu) return -EIO; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); - while (!cpu_isset(cpu, cpu_online_map)) + while (!cpu_online(cpu)) mb(); return 0; } diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 20941d2954e..b7b3e4c7cfc 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,5 +1,5 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o + pat.o pgtable.o obj-$(CONFIG_X86_32) += pgtable_32.o diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9ec62da85fd..de236e419cb 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); + paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); pud = pud_offset(pgd, 0); BUG_ON(pmd_table != pmd_offset(pud, 0)); @@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); } - paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); + paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } @@ -227,6 +227,25 @@ static inline int page_kills_ppro(unsigned long pagenr) return 0; } +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (!page_is_ram(pagenr)) + return 1; + return 0; +} + #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; pgprot_t kmap_prot; @@ -268,47 +287,17 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -static void __meminit free_new_highpage(struct page *page) -{ - init_page_count(page); - __free_page(page); - totalhigh_pages++; -} - void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) { if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { ClearPageReserved(page); - free_new_highpage(page); + init_page_count(page); + __free_page(page); + totalhigh_pages++; } else SetPageReserved(page); } -static int __meminit -add_one_highpage_hotplug(struct page *page, unsigned long pfn) -{ - free_new_highpage(page); - totalram_pages++; -#ifdef CONFIG_FLATMEM - max_mapnr = max(pfn, max_mapnr); -#endif - num_physpages++; - - return 0; -} - -/* - * Not currently handling the NUMA case. - * Assuming single node and all memory that - * has been added dynamically that would be - * onlined here is in HIGHMEM. - */ -void __meminit online_page(struct page *page) -{ - ClearPageReserved(page); - add_one_highpage_hotplug(page, page_to_pfn(page)); -} - #ifndef CONFIG_NUMA static void __init set_highmem_pages_init(int bad_ppro) { @@ -365,7 +354,7 @@ void __init native_pagetable_setup_start(pgd_t *base) pte_clear(NULL, va, pte); } - paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); + paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); } void __init native_pagetable_setup_done(pgd_t *base) @@ -457,7 +446,7 @@ void zap_low_mappings(void) * Note that "pgd_clear()" doesn't do it for * us, because pgd_clear() is a no-op on i386. */ - for (i = 0; i < USER_PTRS_PER_PGD; i++) { + for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) { #ifdef CONFIG_X86_PAE set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); #else @@ -547,9 +536,9 @@ void __init paging_init(void) /* * Test if the WP bit works in supervisor mode. It isn't supported on 386's - * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This - * used to involve black magic jumps to work around some nasty CPU bugs, - * but fortunately the switch to using exceptions got rid of all that. + * and also on some strange 486's. All 586+'s are OK. This used to involve + * black magic jumps to work around some nasty CPU bugs, but fortunately the + * switch to using exceptions got rid of all that. */ static void __init test_wp_bit(void) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1ff7906a9a4..32ba13b0f81 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -135,7 +135,7 @@ static __init void *spp_getpage(void) return ptr; } -static __init void +static void set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) { pgd_t *pgd; @@ -173,7 +173,7 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); pte = pte_offset_kernel(pmd, vaddr); - if (!pte_none(*pte) && + if (!pte_none(*pte) && pte_val(new_pte) && pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) pte_ERROR(*pte); set_pte(pte, new_pte); @@ -214,8 +214,7 @@ void __init cleanup_highmap(void) } /* NOTE: this is meant to be run only at boot */ -void __init -__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) +void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) { unsigned long address = __fix_to_virt(idx); @@ -621,15 +620,6 @@ void __init paging_init(void) /* * Memory hotplug specific functions */ -void online_page(struct page *page) -{ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalram_pages++; - num_physpages++; -} - #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory is added always to NORMAL zone. This means you will never get @@ -664,6 +654,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif /* CONFIG_MEMORY_HOTPLUG */ +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (!page_is_ram(pagenr)) + return 1; + return 0; +} + + static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; @@ -791,7 +801,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) void __init reserve_bootmem_generic(unsigned long phys, unsigned len) { #ifdef CONFIG_NUMA - int nid = phys_to_nid(phys); + int nid, next_nid; #endif unsigned long pfn = phys >> PAGE_SHIFT; @@ -810,10 +820,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) /* Should check here against the e820 map to avoid double free */ #ifdef CONFIG_NUMA - reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); + nid = phys_to_nid(phys); + next_nid = phys_to_nid(phys + len - 1); + if (nid == next_nid) + reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); + else + reserve_bootmem(phys, len, BOOTMEM_DEFAULT); #else reserve_bootmem(phys, len, BOOTMEM_DEFAULT); #endif + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { dma_reserve += len / PAGE_SIZE; set_dma_reserve(dma_reserve); @@ -907,6 +923,10 @@ const char *arch_vma_name(struct vm_area_struct *vma) /* * Initialise the sparsemem vmemmap using huge-pages at the PMD level. */ +static long __meminitdata addr_start, addr_end; +static void __meminitdata *p_start, *p_end; +static int __meminitdata node_start; + int __meminit vmemmap_populate(struct page *start_page, unsigned long size, int node) { @@ -941,12 +961,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) PAGE_KERNEL_LARGE); set_pmd(pmd, __pmd(pte_val(entry))); - printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", - addr, addr + PMD_SIZE - 1, p, node); + /* check to see if we have contiguous blocks */ + if (p_end != p || node_start != node) { + if (p_start) + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", + addr_start, addr_end-1, p_start, p_end-1, node_start); + addr_start = addr; + node_start = node; + p_start = p; + } + addr_end = addr + PMD_SIZE; + p_end = p + PMD_SIZE; } else { vmemmap_verify((pte_t *)pmd, node, addr, next); } } return 0; } + +void __meminit vmemmap_populate_print_last(void) +{ + if (p_start) { + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", + addr_start, addr_end-1, p_start, p_end-1, node_start); + p_start = NULL; + p_end = NULL; + node_start = 0; + } +} #endif diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 3a4baf95e24..804de18abcc 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -117,8 +117,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, - unsigned long prot_val) +static void __iomem *__ioremap_caller(resource_size_t phys_addr, + unsigned long size, unsigned long prot_val, void *caller) { unsigned long pfn, offset, vaddr; resource_size_t last_addr; @@ -212,7 +212,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, /* * Ok, go for it.. */ - area = get_vm_area(size, VM_IOREMAP); + area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) return NULL; area->phys_addr = phys_addr; @@ -255,7 +255,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, */ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { - return __ioremap(phys_addr, size, _PAGE_CACHE_UC); + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_UC, + __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_nocache); @@ -272,7 +273,8 @@ EXPORT_SYMBOL(ioremap_nocache); void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) { if (pat_wc_enabled) - return __ioremap(phys_addr, size, _PAGE_CACHE_WC); + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, + __builtin_return_address(0)); else return ioremap_nocache(phys_addr, size); } @@ -280,7 +282,8 @@ EXPORT_SYMBOL(ioremap_wc); void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) { - return __ioremap(phys_addr, size, _PAGE_CACHE_WB); + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB, + __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_cache); @@ -336,6 +339,35 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access + */ +void *xlate_dev_mem_ptr(unsigned long phys) +{ + void *addr; + unsigned long start = phys & PAGE_MASK; + + /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */ + if (page_is_ram(start >> PAGE_SHIFT)) + return __va(phys); + + addr = (void *)ioremap(start, PAGE_SIZE); + if (addr) + addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); + + return addr; +} + +void unxlate_dev_mem_ptr(unsigned long phys, void *addr) +{ + if (page_is_ram(phys >> PAGE_SHIFT)) + return; + + iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); + return; +} + #ifdef CONFIG_X86_32 int __initdata early_ioremap_debug; @@ -407,7 +439,7 @@ void __init early_ioremap_clear(void) pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); pmd_clear(pmd); - paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); + paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); __flush_tlb_all(); } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9a6892200b2..c5066d519e5 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long bootmap_start, nodedata_phys; void *bootmap; const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); + int nid; start = round_up(start, ZONE_ALIGN); @@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; - /* Find a place for the bootmem map */ + /* + * Find a place for the bootmem map + * nodedata_phys could be on other nodes by alloc_bootmem, + * so need to sure bootmap_start not to be small, otherwise + * early_node_mem will get that with find_e820_area instead + * of alloc_bootmem, that could clash with reserved range + */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + nid = phys_to_nid(nodedata_phys); + if (nid == nodeid) + bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + else + bootmap_start = round_up(start, PAGE_SIZE); /* * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE @@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, free_bootmem_with_active_regions(nodeid, end); - reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, - BOOTMEM_DEFAULT); - reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, - bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); + /* + * convert early reserve to bootmem reserve earlier + * otherwise early_node_mem could use early reserved mem + * on previous node + */ + early_res_to_bootmem(start, end); + + /* + * in some case early_node_mem could use alloc_bootmem + * to get range on other node, don't reserve that again + */ + if (nid != nodeid) + printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); + else + reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, + pgdat_size, BOOTMEM_DEFAULT); + nid = phys_to_nid(bootmap_start); + if (nid != nodeid) + printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); + else + reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, + bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); + #ifdef CONFIG_ACPI_NUMA srat_reserve_add_area(nodeid); #endif diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index c29ebd03725..bd5e05c654d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) goto out_unlock; pbase = (pte_t *)page_address(base); -#ifdef CONFIG_X86_32 - paravirt_alloc_pt(&init_mm, page_to_pfn(base)); -#endif + paravirt_alloc_pte(&init_mm, page_to_pfn(base)); ref_prot = pte_pgprot(pte_clrhuge(*kpte)); #ifdef CONFIG_X86_64 diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 72c0f609740..277446cd30b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -11,16 +11,19 @@ #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/fs.h> +#include <linux/bootmem.h> #include <asm/msr.h> #include <asm/tlbflush.h> #include <asm/processor.h> +#include <asm/page.h> #include <asm/pgtable.h> #include <asm/pat.h> #include <asm/e820.h> #include <asm/cacheflush.h> #include <asm/fcntl.h> #include <asm/mtrr.h> +#include <asm/io.h> int pat_wc_enabled = 1; @@ -190,6 +193,21 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, return 0; } +/* + * req_type typically has one of the: + * - _PAGE_CACHE_WB + * - _PAGE_CACHE_WC + * - _PAGE_CACHE_UC_MINUS + * - _PAGE_CACHE_UC + * + * req_type will have a special case value '-1', when requester want to inherit + * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. + * + * If ret_type is NULL, function will return an error if it cannot reserve the + * region with req_type. If ret_type is non-null, function will return + * available type in ret_type in case of no error. In case of any error + * it will return a negative return value. + */ int reserve_memtype(u64 start, u64 end, unsigned long req_type, unsigned long *ret_type) { @@ -200,9 +218,14 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, /* Only track when pat_wc_enabled */ if (!pat_wc_enabled) { - if (ret_type) - *ret_type = req_type; - + /* This is identical to page table setting without PAT */ + if (ret_type) { + if (req_type == -1) { + *ret_type = _PAGE_CACHE_WB; + } else { + *ret_type = req_type; + } + } return 0; } @@ -214,8 +237,29 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return 0; } - req_type &= _PAGE_CACHE_MASK; - err = pat_x_mtrr_type(start, end, req_type, &actual_type); + if (req_type == -1) { + /* + * Special case where caller wants to inherit from mtrr or + * existing pat mapping, defaulting to UC_MINUS in case of + * no match. + */ + u8 mtrr_type = mtrr_type_lookup(start, end); + if (mtrr_type == 0xFE) { /* MTRR match error */ + err = -1; + } + + if (mtrr_type == MTRR_TYPE_WRBACK) { + req_type = _PAGE_CACHE_WB; + actual_type = _PAGE_CACHE_WB; + } else { + req_type = _PAGE_CACHE_UC_MINUS; + actual_type = _PAGE_CACHE_UC_MINUS; + } + } else { + req_type &= _PAGE_CACHE_MASK; + err = pat_x_mtrr_type(start, end, req_type, &actual_type); + } + if (err) { if (ret_type) *ret_type = actual_type; @@ -241,7 +285,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, struct memtype *saved_ptr; if (parse->start >= end) { - printk("New Entry\n"); + pr_debug("New Entry\n"); list_add(&new_entry->nd, parse->nd.prev); new_entry = NULL; break; @@ -291,7 +335,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, break; } - printk("Overlap at 0x%Lx-0x%Lx\n", + pr_debug("Overlap at 0x%Lx-0x%Lx\n", saved_ptr->start, saved_ptr->end); /* No conflict. Go ahead and add this new entry */ list_add(&new_entry->nd, saved_ptr->nd.prev); @@ -343,8 +387,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, break; } - printk("Overlap at 0x%Lx-0x%Lx\n", - saved_ptr->start, saved_ptr->end); + pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", + saved_ptr->start, saved_ptr->end); /* No conflict. Go ahead and add this new entry */ list_add(&new_entry->nd, &saved_ptr->nd); new_entry = NULL; @@ -353,7 +397,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, } if (err) { - printk( + printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", start, end, cattr_name(new_entry->type), cattr_name(req_type)); @@ -365,16 +409,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_entry) { /* No conflict. Not yet added to the list. Add to the tail */ list_add_tail(&new_entry->nd, &memtype_list); - printk("New Entry\n"); - } + pr_debug("New Entry\n"); + } if (ret_type) { - printk( + pr_debug( "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", start, end, cattr_name(actual_type), cattr_name(req_type), cattr_name(*ret_type)); } else { - printk( + pr_debug( "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", start, end, cattr_name(actual_type), cattr_name(req_type)); @@ -411,11 +455,142 @@ int free_memtype(u64 start, u64 end) spin_unlock(&memtype_lock); if (err) { - printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n", + printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", current->comm, current->pid, start, end); } - printk( "free_memtype request 0x%Lx-0x%Lx\n", start, end); + pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end); return err; } + +/* + * /dev/mem mmap interface. The memtype used for mapping varies: + * - Use UC for mappings with O_SYNC flag + * - Without O_SYNC flag, if there is any conflict in reserve_memtype, + * inherit the memtype from existing mapping. + * - Else use UC_MINUS memtype (for backward compatibility with existing + * X drivers. + */ +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + return vma_prot; +} + +#ifdef CONFIG_NONPROMISC_DEVMEM +/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/ +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + return 1; +} +#else +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; + + while (cursor < to) { + if (!devmem_is_allowed(pfn)) { + printk(KERN_INFO + "Program %s tried to access /dev/mem between %Lx->%Lx.\n", + current->comm, from, to); + return 0; + } + cursor += PAGE_SIZE; + pfn++; + } + return 1; +} +#endif /* CONFIG_NONPROMISC_DEVMEM */ + +int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t *vma_prot) +{ + u64 offset = ((u64) pfn) << PAGE_SHIFT; + unsigned long flags = _PAGE_CACHE_UC_MINUS; + int retval; + + if (!range_is_allowed(pfn, size)) + return 0; + + if (file->f_flags & O_SYNC) { + flags = _PAGE_CACHE_UC; + } + +#ifdef CONFIG_X86_32 + /* + * On the PPro and successors, the MTRRs are used to set + * memory types for physical addresses outside main memory, + * so blindly setting UC or PWT on those pages is wrong. + * For Pentiums and earlier, the surround logic should disable + * caching for the high addresses through the KEN pin, but + * we maintain the tradition of paranoia in this code. + */ + if (!pat_wc_enabled && + ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && + (pfn << PAGE_SHIFT) >= __pa(high_memory)) { + flags = _PAGE_CACHE_UC; + } +#endif + + /* + * With O_SYNC, we can only take UC mapping. Fail if we cannot. + * Without O_SYNC, we want to get + * - WB for WB-able memory and no other conflicting mappings + * - UC_MINUS for non-WB-able memory with no other conflicting mappings + * - Inherit from confliting mappings otherwise + */ + if (flags != _PAGE_CACHE_UC_MINUS) { + retval = reserve_memtype(offset, offset + size, flags, NULL); + } else { + retval = reserve_memtype(offset, offset + size, -1, &flags); + } + + if (retval < 0) + return 0; + + if (pfn <= max_pfn_mapped && + ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { + free_memtype(offset, offset + size); + printk(KERN_INFO + "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", + current->comm, current->pid, + cattr_name(flags), + offset, offset + size); + return 0; + } + + *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | + flags); + return 1; +} + +void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) +{ + u64 addr = (u64)pfn << PAGE_SHIFT; + unsigned long flags; + unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); + + reserve_memtype(addr, addr + size, want_flags, &flags); + if (flags != want_flags) { + printk(KERN_INFO + "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n", + current->comm, current->pid, + cattr_name(want_flags), + addr, addr + size, + cattr_name(flags)); + } +} + +void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) +{ + u64 addr = (u64)pfn << PAGE_SHIFT; + + free_memtype(addr, addr + size); +} + diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c new file mode 100644 index 00000000000..50159764f69 --- /dev/null +++ b/arch/x86/mm/pgtable.c @@ -0,0 +1,276 @@ +#include <linux/mm.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/tlb.h> + +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +{ + return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); +} + +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *pte; + +#ifdef CONFIG_HIGHPTE + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); +#else + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); +#endif + if (pte) + pgtable_page_ctor(pte); + return pte; +} + +void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) +{ + pgtable_page_dtor(pte); + paravirt_release_pte(page_to_pfn(pte)); + tlb_remove_page(tlb, pte); +} + +#if PAGETABLE_LEVELS > 2 +void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) +{ + paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); + tlb_remove_page(tlb, virt_to_page(pmd)); +} + +#if PAGETABLE_LEVELS > 3 +void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) +{ + paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); + tlb_remove_page(tlb, virt_to_page(pud)); +} +#endif /* PAGETABLE_LEVELS > 3 */ +#endif /* PAGETABLE_LEVELS > 2 */ + +static inline void pgd_list_add(pgd_t *pgd) +{ + struct page *page = virt_to_page(pgd); + + list_add(&page->lru, &pgd_list); +} + +static inline void pgd_list_del(pgd_t *pgd) +{ + struct page *page = virt_to_page(pgd); + + list_del(&page->lru); +} + +#define UNSHARED_PTRS_PER_PGD \ + (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) + +static void pgd_ctor(void *p) +{ + pgd_t *pgd = p; + unsigned long flags; + + /* Clear usermode parts of PGD */ + memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t)); + + spin_lock_irqsave(&pgd_lock, flags); + + /* If the pgd points to a shared pagetable level (either the + ptes in non-PAE, or shared PMD in PAE), then just copy the + references from swapper_pg_dir. */ + if (PAGETABLE_LEVELS == 2 || + (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || + PAGETABLE_LEVELS == 4) { + clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT, + __pa(swapper_pg_dir) >> PAGE_SHIFT, + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + } + + /* list required to sync kernel mapping updates */ + if (!SHARED_KERNEL_PMD) + pgd_list_add(pgd); + + spin_unlock_irqrestore(&pgd_lock, flags); +} + +static void pgd_dtor(void *pgd) +{ + unsigned long flags; /* can be called from interrupt context */ + + if (SHARED_KERNEL_PMD) + return; + + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_del(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); +} + +/* + * List of all pgd's needed for non-PAE so it can invalidate entries + * in both cached and uncached pgd's; not needed for PAE since the + * kernel pmd is shared. If PAE were not to share the pmd a similar + * tactic would be needed. This is essentially codepath-based locking + * against pageattr.c; it is the unique case in which a valid change + * of kernel pagetables can't be lazily synchronized by vmalloc faults. + * vmalloc faults work because attached pagetables are never freed. + * -- wli + */ + +#ifdef CONFIG_X86_PAE +/* + * Mop up any pmd pages which may still be attached to the pgd. + * Normally they will be freed by munmap/exit_mmap, but any pmd we + * preallocate which never got a corresponding vma will need to be + * freed manually. + */ +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) +{ + int i; + + for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { + pgd_t pgd = pgdp[i]; + + if (pgd_val(pgd) != 0) { + pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); + + pgdp[i] = native_make_pgd(0); + + paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); + pmd_free(mm, pmd); + } + } +} + +/* + * In PAE mode, we need to do a cr3 reload (=tlb flush) when + * updating the top-level pagetable entries to guarantee the + * processor notices the update. Since this is expensive, and + * all 4 top-level entries are used almost immediately in a + * new process's life, we just pre-populate them here. + * + * Also, if we're in a paravirt environment where the kernel pmd is + * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate + * and initialize the kernel pmds here. + */ +static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) +{ + pud_t *pud; + unsigned long addr; + int i; + + pud = pud_offset(pgd, 0); + for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; + i++, pud++, addr += PUD_SIZE) { + pmd_t *pmd = pmd_alloc_one(mm, addr); + + if (!pmd) { + pgd_mop_up_pmds(mm, pgd); + return 0; + } + + if (i >= KERNEL_PGD_BOUNDARY) + memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), + sizeof(pmd_t) * PTRS_PER_PMD); + + pud_populate(mm, pud, pmd); + } + + return 1; +} + +void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) +{ + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + + /* Note: almost everything apart from _PAGE_PRESENT is + reserved at the pmd (PDPT) level. */ + set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); + + /* + * According to Intel App note "TLBs, Paging-Structure Caches, + * and Their Invalidation", April 2007, document 317080-001, + * section 8.1: in PAE mode we explicitly have to flush the + * TLB via cr3 if the top-level pgd is changed... + */ + if (mm == current->active_mm) + write_cr3(read_cr3()); +} +#else /* !CONFIG_X86_PAE */ +/* No need to prepopulate any pagetable entries in non-PAE modes. */ +static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) +{ + return 1; +} + +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd) +{ +} +#endif /* CONFIG_X86_PAE */ + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + + /* so that alloc_pmd can use it */ + mm->pgd = pgd; + if (pgd) + pgd_ctor(pgd); + + if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { + pgd_dtor(pgd); + free_page((unsigned long)pgd); + pgd = NULL; + } + + return pgd; +} + +void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + pgd_mop_up_pmds(mm, pgd); + pgd_dtor(pgd); + free_page((unsigned long)pgd); +} + +int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + int changed = !pte_same(*ptep, entry); + + if (changed && dirty) { + *ptep = entry; + pte_update_defer(vma->vm_mm, address, ptep); + flush_tlb_page(vma, address); + } + + return changed; +} + +int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + int ret = 0; + + if (pte_young(*ptep)) + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, + &ptep->pte); + + if (ret) + pte_update(vma->vm_mm, addr, ptep); + + return ret; +} + +int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + int young; + + young = ptep_test_and_clear_young(vma, address, ptep); + if (young) + flush_tlb_page(vma, address); + + return young; +} diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 6fb9e7c6893..9ee007be914 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -173,210 +173,6 @@ void reserve_top_address(unsigned long reserve) __VMALLOC_RESERVE += reserve; } -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); -} - -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - struct page *pte; - -#ifdef CONFIG_HIGHPTE - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); -#else - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); -#endif - if (pte) - pgtable_page_ctor(pte); - return pte; -} - -/* - * List of all pgd's needed for non-PAE so it can invalidate entries - * in both cached and uncached pgd's; not needed for PAE since the - * kernel pmd is shared. If PAE were not to share the pmd a similar - * tactic would be needed. This is essentially codepath-based locking - * against pageattr.c; it is the unique case in which a valid change - * of kernel pagetables can't be lazily synchronized by vmalloc faults. - * vmalloc faults work because attached pagetables are never freed. - * -- wli - */ -static inline void pgd_list_add(pgd_t *pgd) -{ - struct page *page = virt_to_page(pgd); - - list_add(&page->lru, &pgd_list); -} - -static inline void pgd_list_del(pgd_t *pgd) -{ - struct page *page = virt_to_page(pgd); - - list_del(&page->lru); -} - -#define UNSHARED_PTRS_PER_PGD \ - (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) - -static void pgd_ctor(void *p) -{ - pgd_t *pgd = p; - unsigned long flags; - - /* Clear usermode parts of PGD */ - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); - - spin_lock_irqsave(&pgd_lock, flags); - - /* If the pgd points to a shared pagetable level (either the - ptes in non-PAE, or shared PMD in PAE), then just copy the - references from swapper_pg_dir. */ - if (PAGETABLE_LEVELS == 2 || - (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) { - clone_pgd_range(pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - KERNEL_PGD_PTRS); - paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, - __pa(swapper_pg_dir) >> PAGE_SHIFT, - USER_PTRS_PER_PGD, - KERNEL_PGD_PTRS); - } - - /* list required to sync kernel mapping updates */ - if (!SHARED_KERNEL_PMD) - pgd_list_add(pgd); - - spin_unlock_irqrestore(&pgd_lock, flags); -} - -static void pgd_dtor(void *pgd) -{ - unsigned long flags; /* can be called from interrupt context */ - - if (SHARED_KERNEL_PMD) - return; - - spin_lock_irqsave(&pgd_lock, flags); - pgd_list_del(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); -} - -#ifdef CONFIG_X86_PAE -/* - * Mop up any pmd pages which may still be attached to the pgd. - * Normally they will be freed by munmap/exit_mmap, but any pmd we - * preallocate which never got a corresponding vma will need to be - * freed manually. - */ -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) -{ - int i; - - for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { - pgd_t pgd = pgdp[i]; - - if (pgd_val(pgd) != 0) { - pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); - - pgdp[i] = native_make_pgd(0); - - paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT); - pmd_free(mm, pmd); - } - } -} - -/* - * In PAE mode, we need to do a cr3 reload (=tlb flush) when - * updating the top-level pagetable entries to guarantee the - * processor notices the update. Since this is expensive, and - * all 4 top-level entries are used almost immediately in a - * new process's life, we just pre-populate them here. - * - * Also, if we're in a paravirt environment where the kernel pmd is - * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate - * and initialize the kernel pmds here. - */ -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) -{ - pud_t *pud; - unsigned long addr; - int i; - - pud = pud_offset(pgd, 0); - for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; - i++, pud++, addr += PUD_SIZE) { - pmd_t *pmd = pmd_alloc_one(mm, addr); - - if (!pmd) { - pgd_mop_up_pmds(mm, pgd); - return 0; - } - - if (i >= USER_PTRS_PER_PGD) - memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), - sizeof(pmd_t) * PTRS_PER_PMD); - - pud_populate(mm, pud, pmd); - } - - return 1; -} -#else /* !CONFIG_X86_PAE */ -/* No need to prepopulate any pagetable entries in non-PAE modes. */ -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) -{ - return 1; -} - -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) -{ -} -#endif /* CONFIG_X86_PAE */ - -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - - /* so that alloc_pd can use it */ - mm->pgd = pgd; - if (pgd) - pgd_ctor(pgd); - - if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { - pgd_dtor(pgd); - free_page((unsigned long)pgd); - pgd = NULL; - } - - return pgd; -} - -void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - pgd_mop_up_pmds(mm, pgd); - pgd_dtor(pgd); - free_page((unsigned long)pgd); -} - -void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) -{ - pgtable_page_dtor(pte); - paravirt_release_pt(page_to_pfn(pte)); - tlb_remove_page(tlb, pte); -} - -#ifdef CONFIG_X86_PAE - -void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) -{ - paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); - tlb_remove_page(tlb, virt_to_page(pmd)); -} - -#endif - int pmd_bad(pmd_t pmd) { WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S index 4b1620a1529..1d3aa6b8718 100644 --- a/arch/x86/vdso/vdso.S +++ b/arch/x86/vdso/vdso.S @@ -1,2 +1,10 @@ - .section ".vdso","a" +#include <linux/init.h> + +__INITDATA + + .globl vdso_start, vdso_end +vdso_start: .incbin "arch/x86/vdso/vdso.so" +vdso_end: + +__FINIT diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 4d5f2649bee..2e641be2737 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,7 +6,7 @@ config XEN bool "Xen guest support" select PARAVIRT depends on X86_32 - depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER) + depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 343df246bd3..3d8df981d5f 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ -obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ - events.o time.o manage.o xen-asm.o +obj-y := enlighten.o setup.o multicalls.o mmu.o \ + time.o manage.o xen-asm.o grant-table.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c0388220cf9..c8a56e457d6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -155,7 +155,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, if (*ax == 1) maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ (1 << X86_FEATURE_ACPI) | /* disable ACPI */ - (1 << X86_FEATURE_SEP) | /* disable SEP */ + (1 << X86_FEATURE_MCE) | /* disable MCE */ + (1 << X86_FEATURE_MCA) | /* disable MCA */ (1 << X86_FEATURE_ACC)); /* thermal monitoring */ asm(XEN_EMULATE_PREFIX "cpuid" @@ -531,26 +532,37 @@ static void xen_apic_write(unsigned long reg, u32 val) static void xen_flush_tlb(void) { struct mmuext_op *op; - struct multicall_space mcs = xen_mc_entry(sizeof(*op)); + struct multicall_space mcs; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); op = mcs.args; op->cmd = MMUEXT_TLB_FLUSH_LOCAL; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); } static void xen_flush_tlb_single(unsigned long addr) { struct mmuext_op *op; - struct multicall_space mcs = xen_mc_entry(sizeof(*op)); + struct multicall_space mcs; + + preempt_disable(); + mcs = xen_mc_entry(sizeof(*op)); op = mcs.args; op->cmd = MMUEXT_INVLPG_LOCAL; op->arg1.linear_addr = addr & PAGE_MASK; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); } static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, @@ -655,15 +667,17 @@ static void xen_write_cr3(unsigned long cr3) /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ -static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) +static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) { +#ifdef CONFIG_FLATMEM BUG_ON(mem_map); /* should only be used early */ +#endif make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); } -/* Early release_pt assumes that all pts are pinned, since there's +/* Early release_pte assumes that all pts are pinned, since there's only init_mm and anything attached to that is pinned. */ -static void xen_release_pt_init(u32 pfn) +static void xen_release_pte_init(u32 pfn) { make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } @@ -697,12 +711,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) } } -static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) +static void xen_alloc_pte(struct mm_struct *mm, u32 pfn) { xen_alloc_ptpage(mm, pfn, PT_PTE); } -static void xen_alloc_pd(struct mm_struct *mm, u32 pfn) +static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn) { xen_alloc_ptpage(mm, pfn, PT_PMD); } @@ -722,12 +736,12 @@ static void xen_release_ptpage(u32 pfn, unsigned level) } } -static void xen_release_pt(u32 pfn) +static void xen_release_pte(u32 pfn) { xen_release_ptpage(pfn, PT_PTE); } -static void xen_release_pd(u32 pfn) +static void xen_release_pmd(u32 pfn) { xen_release_ptpage(pfn, PT_PMD); } @@ -849,10 +863,10 @@ static __init void xen_pagetable_setup_done(pgd_t *base) { /* This will work as long as patching hasn't happened yet (which it hasn't) */ - pv_mmu_ops.alloc_pt = xen_alloc_pt; - pv_mmu_ops.alloc_pd = xen_alloc_pd; - pv_mmu_ops.release_pt = xen_release_pt; - pv_mmu_ops.release_pd = xen_release_pd; + pv_mmu_ops.alloc_pte = xen_alloc_pte; + pv_mmu_ops.alloc_pmd = xen_alloc_pmd; + pv_mmu_ops.release_pte = xen_release_pte; + pv_mmu_ops.release_pmd = xen_release_pmd; pv_mmu_ops.set_pte = xen_set_pte; setup_shared_info(); @@ -994,7 +1008,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .read_pmc = native_read_pmc, .iret = xen_iret, - .irq_enable_syscall_ret = NULL, /* never called */ + .irq_enable_syscall_ret = xen_sysexit, .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, @@ -1059,11 +1073,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, - .alloc_pt = xen_alloc_pt_init, - .release_pt = xen_release_pt_init, - .alloc_pd = xen_alloc_pt_init, - .alloc_pd_clone = paravirt_nop, - .release_pd = xen_release_pt_init, + .alloc_pte = xen_alloc_pte_init, + .release_pte = xen_release_pte_init, + .alloc_pmd = xen_alloc_pte_init, + .alloc_pmd_clone = paravirt_nop, + .release_pmd = xen_release_pte_init, #ifdef CONFIG_HIGHPTE .kmap_atomic_pte = xen_kmap_atomic_pte, diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c deleted file mode 100644 index dcf613e1758..00000000000 --- a/arch/x86/xen/events.c +++ /dev/null @@ -1,591 +0,0 @@ -/* - * Xen event channels - * - * Xen models interrupts with abstract event channels. Because each - * domain gets 1024 event channels, but NR_IRQ is not that large, we - * must dynamically map irqs<->event channels. The event channels - * interface with the rest of the kernel by defining a xen interrupt - * chip. When an event is recieved, it is mapped to an irq and sent - * through the normal interrupt processing path. - * - * There are four kinds of events which can be mapped to an event - * channel: - * - * 1. Inter-domain notifications. This includes all the virtual - * device events, since they're driven by front-ends in another domain - * (typically dom0). - * 2. VIRQs, typically used for timers. These are per-cpu events. - * 3. IPIs. - * 4. Hardware interrupts. Not supported at present. - * - * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 - */ - -#include <linux/linkage.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/module.h> -#include <linux/string.h> - -#include <asm/ptrace.h> -#include <asm/irq.h> -#include <asm/sync_bitops.h> -#include <asm/xen/hypercall.h> -#include <asm/xen/hypervisor.h> - -#include <xen/events.h> -#include <xen/interface/xen.h> -#include <xen/interface/event_channel.h> - -#include "xen-ops.h" - -/* - * This lock protects updates to the following mapping and reference-count - * arrays. The lock does not need to be acquired to read the mapping tables. - */ -static DEFINE_SPINLOCK(irq_mapping_update_lock); - -/* IRQ <-> VIRQ mapping. */ -static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; - -/* IRQ <-> IPI mapping */ -static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1}; - -/* Packed IRQ information: binding type, sub-type index, and event channel. */ -struct packed_irq -{ - unsigned short evtchn; - unsigned char index; - unsigned char type; -}; - -static struct packed_irq irq_info[NR_IRQS]; - -/* Binding types. */ -enum { - IRQT_UNBOUND, - IRQT_PIRQ, - IRQT_VIRQ, - IRQT_IPI, - IRQT_EVTCHN -}; - -/* Convenient shorthand for packed representation of an unbound IRQ. */ -#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) - -static int evtchn_to_irq[NR_EVENT_CHANNELS] = { - [0 ... NR_EVENT_CHANNELS-1] = -1 -}; -static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; -static u8 cpu_evtchn[NR_EVENT_CHANNELS]; - -/* Reference counts for bindings to IRQs. */ -static int irq_bindcount[NR_IRQS]; - -/* Xen will never allocate port zero for any purpose. */ -#define VALID_EVTCHN(chn) ((chn) != 0) - -/* - * Force a proper event-channel callback from Xen after clearing the - * callback mask. We do this in a very simple manner, by making a call - * down into Xen. The pending flag will be checked by Xen on return. - */ -void force_evtchn_callback(void) -{ - (void)HYPERVISOR_xen_version(0, NULL); -} -EXPORT_SYMBOL_GPL(force_evtchn_callback); - -static struct irq_chip xen_dynamic_chip; - -/* Constructor for packed IRQ information. */ -static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn) -{ - return (struct packed_irq) { evtchn, index, type }; -} - -/* - * Accessors for packed IRQ information. - */ -static inline unsigned int evtchn_from_irq(int irq) -{ - return irq_info[irq].evtchn; -} - -static inline unsigned int index_from_irq(int irq) -{ - return irq_info[irq].index; -} - -static inline unsigned int type_from_irq(int irq) -{ - return irq_info[irq].type; -} - -static inline unsigned long active_evtchns(unsigned int cpu, - struct shared_info *sh, - unsigned int idx) -{ - return (sh->evtchn_pending[idx] & - cpu_evtchn_mask[cpu][idx] & - ~sh->evtchn_mask[idx]); -} - -static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) -{ - int irq = evtchn_to_irq[chn]; - - BUG_ON(irq == -1); -#ifdef CONFIG_SMP - irq_desc[irq].affinity = cpumask_of_cpu(cpu); -#endif - - __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); - __set_bit(chn, cpu_evtchn_mask[cpu]); - - cpu_evtchn[chn] = cpu; -} - -static void init_evtchn_cpu_bindings(void) -{ -#ifdef CONFIG_SMP - int i; - /* By default all event channels notify CPU#0. */ - for (i = 0; i < NR_IRQS; i++) - irq_desc[i].affinity = cpumask_of_cpu(0); -#endif - - memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); - memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); -} - -static inline unsigned int cpu_from_evtchn(unsigned int evtchn) -{ - return cpu_evtchn[evtchn]; -} - -static inline void clear_evtchn(int port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - sync_clear_bit(port, &s->evtchn_pending[0]); -} - -static inline void set_evtchn(int port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - sync_set_bit(port, &s->evtchn_pending[0]); -} - - -/** - * notify_remote_via_irq - send event to remote end of event channel via irq - * @irq: irq of event channel to send event to - * - * Unlike notify_remote_via_evtchn(), this is safe to use across - * save/restore. Notifications on a broken connection are silently - * dropped. - */ -void notify_remote_via_irq(int irq) -{ - int evtchn = evtchn_from_irq(irq); - - if (VALID_EVTCHN(evtchn)) - notify_remote_via_evtchn(evtchn); -} -EXPORT_SYMBOL_GPL(notify_remote_via_irq); - -static void mask_evtchn(int port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - sync_set_bit(port, &s->evtchn_mask[0]); -} - -static void unmask_evtchn(int port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - unsigned int cpu = get_cpu(); - - BUG_ON(!irqs_disabled()); - - /* Slow path (hypercall) if this is a non-local port. */ - if (unlikely(cpu != cpu_from_evtchn(port))) { - struct evtchn_unmask unmask = { .port = port }; - (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); - } else { - struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); - - sync_clear_bit(port, &s->evtchn_mask[0]); - - /* - * The following is basically the equivalent of - * 'hw_resend_irq'. Just like a real IO-APIC we 'lose - * the interrupt edge' if the channel is masked. - */ - if (sync_test_bit(port, &s->evtchn_pending[0]) && - !sync_test_and_set_bit(port / BITS_PER_LONG, - &vcpu_info->evtchn_pending_sel)) - vcpu_info->evtchn_upcall_pending = 1; - } - - put_cpu(); -} - -static int find_unbound_irq(void) -{ - int irq; - - /* Only allocate from dynirq range */ - for (irq = 0; irq < NR_IRQS; irq++) - if (irq_bindcount[irq] == 0) - break; - - if (irq == NR_IRQS) - panic("No available IRQ to bind to: increase NR_IRQS!\n"); - - return irq; -} - -int bind_evtchn_to_irq(unsigned int evtchn) -{ - int irq; - - spin_lock(&irq_mapping_update_lock); - - irq = evtchn_to_irq[evtchn]; - - if (irq == -1) { - irq = find_unbound_irq(); - - dynamic_irq_init(irq); - set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, - handle_level_irq, "event"); - - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); - } - - irq_bindcount[irq]++; - - spin_unlock(&irq_mapping_update_lock); - - return irq; -} -EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); - -static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) -{ - struct evtchn_bind_ipi bind_ipi; - int evtchn, irq; - - spin_lock(&irq_mapping_update_lock); - - irq = per_cpu(ipi_to_irq, cpu)[ipi]; - if (irq == -1) { - irq = find_unbound_irq(); - if (irq < 0) - goto out; - - dynamic_irq_init(irq); - set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, - handle_level_irq, "ipi"); - - bind_ipi.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, - &bind_ipi) != 0) - BUG(); - evtchn = bind_ipi.port; - - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); - - per_cpu(ipi_to_irq, cpu)[ipi] = irq; - - bind_evtchn_to_cpu(evtchn, cpu); - } - - irq_bindcount[irq]++; - - out: - spin_unlock(&irq_mapping_update_lock); - return irq; -} - - -static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) -{ - struct evtchn_bind_virq bind_virq; - int evtchn, irq; - - spin_lock(&irq_mapping_update_lock); - - irq = per_cpu(virq_to_irq, cpu)[virq]; - - if (irq == -1) { - bind_virq.virq = virq; - bind_virq.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, - &bind_virq) != 0) - BUG(); - evtchn = bind_virq.port; - - irq = find_unbound_irq(); - - dynamic_irq_init(irq); - set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, - handle_level_irq, "virq"); - - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); - - per_cpu(virq_to_irq, cpu)[virq] = irq; - - bind_evtchn_to_cpu(evtchn, cpu); - } - - irq_bindcount[irq]++; - - spin_unlock(&irq_mapping_update_lock); - - return irq; -} - -static void unbind_from_irq(unsigned int irq) -{ - struct evtchn_close close; - int evtchn = evtchn_from_irq(irq); - - spin_lock(&irq_mapping_update_lock); - - if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { - close.port = evtchn; - if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) - BUG(); - - switch (type_from_irq(irq)) { - case IRQT_VIRQ: - per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) - [index_from_irq(irq)] = -1; - break; - default: - break; - } - - /* Closed ports are implicitly re-bound to VCPU0. */ - bind_evtchn_to_cpu(evtchn, 0); - - evtchn_to_irq[evtchn] = -1; - irq_info[irq] = IRQ_UNBOUND; - - dynamic_irq_init(irq); - } - - spin_unlock(&irq_mapping_update_lock); -} - -int bind_evtchn_to_irqhandler(unsigned int evtchn, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, void *dev_id) -{ - unsigned int irq; - int retval; - - irq = bind_evtchn_to_irq(evtchn); - retval = request_irq(irq, handler, irqflags, devname, dev_id); - if (retval != 0) { - unbind_from_irq(irq); - return retval; - } - - return irq; -} -EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); - -int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, - irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -{ - unsigned int irq; - int retval; - - irq = bind_virq_to_irq(virq, cpu); - retval = request_irq(irq, handler, irqflags, devname, dev_id); - if (retval != 0) { - unbind_from_irq(irq); - return retval; - } - - return irq; -} -EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); - -int bind_ipi_to_irqhandler(enum ipi_vector ipi, - unsigned int cpu, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, - void *dev_id) -{ - int irq, retval; - - irq = bind_ipi_to_irq(ipi, cpu); - if (irq < 0) - return irq; - - retval = request_irq(irq, handler, irqflags, devname, dev_id); - if (retval != 0) { - unbind_from_irq(irq); - return retval; - } - - return irq; -} - -void unbind_from_irqhandler(unsigned int irq, void *dev_id) -{ - free_irq(irq, dev_id); - unbind_from_irq(irq); -} -EXPORT_SYMBOL_GPL(unbind_from_irqhandler); - -void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) -{ - int irq = per_cpu(ipi_to_irq, cpu)[vector]; - BUG_ON(irq < 0); - notify_remote_via_irq(irq); -} - - -/* - * Search the CPUs pending events bitmasks. For each one found, map - * the event number to an irq, and feed it into do_IRQ() for - * handling. - * - * Xen uses a two-level bitmap to speed searching. The first level is - * a bitset of words which contain pending event bits. The second - * level is a bitset of pending events themselves. - */ -void xen_evtchn_do_upcall(struct pt_regs *regs) -{ - int cpu = get_cpu(); - struct shared_info *s = HYPERVISOR_shared_info; - struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); - unsigned long pending_words; - - vcpu_info->evtchn_upcall_pending = 0; - - /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ - pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); - while (pending_words != 0) { - unsigned long pending_bits; - int word_idx = __ffs(pending_words); - pending_words &= ~(1UL << word_idx); - - while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { - int bit_idx = __ffs(pending_bits); - int port = (word_idx * BITS_PER_LONG) + bit_idx; - int irq = evtchn_to_irq[port]; - - if (irq != -1) { - regs->orig_ax = ~irq; - do_IRQ(regs); - } - } - } - - put_cpu(); -} - -/* Rebind an evtchn so that it gets delivered to a specific cpu */ -static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) -{ - struct evtchn_bind_vcpu bind_vcpu; - int evtchn = evtchn_from_irq(irq); - - if (!VALID_EVTCHN(evtchn)) - return; - - /* Send future instances of this interrupt to other vcpu. */ - bind_vcpu.port = evtchn; - bind_vcpu.vcpu = tcpu; - - /* - * If this fails, it usually just indicates that we're dealing with a - * virq or IPI channel, which don't actually need to be rebound. Ignore - * it, but don't do the xenlinux-level rebind in that case. - */ - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) - bind_evtchn_to_cpu(evtchn, tcpu); -} - - -static void set_affinity_irq(unsigned irq, cpumask_t dest) -{ - unsigned tcpu = first_cpu(dest); - rebind_irq_to_cpu(irq, tcpu); -} - -static void enable_dynirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - - if (VALID_EVTCHN(evtchn)) - unmask_evtchn(evtchn); -} - -static void disable_dynirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - - if (VALID_EVTCHN(evtchn)) - mask_evtchn(evtchn); -} - -static void ack_dynirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - - move_native_irq(irq); - - if (VALID_EVTCHN(evtchn)) - clear_evtchn(evtchn); -} - -static int retrigger_dynirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - int ret = 0; - - if (VALID_EVTCHN(evtchn)) { - set_evtchn(evtchn); - ret = 1; - } - - return ret; -} - -static struct irq_chip xen_dynamic_chip __read_mostly = { - .name = "xen-dyn", - .mask = disable_dynirq, - .unmask = enable_dynirq, - .ack = ack_dynirq, - .set_affinity = set_affinity_irq, - .retrigger = retrigger_dynirq, -}; - -void __init xen_init_IRQ(void) -{ - int i; - - init_evtchn_cpu_bindings(); - - /* No event channels are 'live' right now. */ - for (i = 0; i < NR_EVENT_CHANNELS; i++) - mask_evtchn(i); - - /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - for (i = 0; i < NR_IRQS; i++) - irq_bindcount[i] = 0; - - irq_ctx_init(smp_processor_id()); -} diff --git a/arch/x86/xen/features.c b/arch/x86/xen/features.c deleted file mode 100644 index 0707714e40d..00000000000 --- a/arch/x86/xen/features.c +++ /dev/null @@ -1,29 +0,0 @@ -/****************************************************************************** - * features.c - * - * Xen feature flags. - * - * Copyright (c) 2006, Ian Campbell, XenSource Inc. - */ -#include <linux/types.h> -#include <linux/cache.h> -#include <linux/module.h> -#include <asm/xen/hypervisor.h> -#include <xen/features.h> - -u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; -EXPORT_SYMBOL_GPL(xen_features); - -void xen_setup_features(void) -{ - struct xen_feature_info fi; - int i, j; - - for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { - fi.submap_idx = i; - if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) - break; - for (j = 0; j < 32; j++) - xen_features[i * 32 + j] = !!(fi.submap & 1<<j); - } -} diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c new file mode 100644 index 00000000000..49ba9b5224d --- /dev/null +++ b/arch/x86/xen/grant-table.c @@ -0,0 +1,91 @@ +/****************************************************************************** + * grant_table.c + * x86 specific part + * + * Granting foreign access to our memory reservation. + * + * Copyright (c) 2005-2006, Christopher Clark + * Copyright (c) 2004-2005, K A Fraser + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan. Split out x86 specific part. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> + +#include <xen/interface/xen.h> +#include <xen/page.h> +#include <xen/grant_table.h> + +#include <asm/pgtable.h> + +static int map_pte_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + unsigned long **frames = (unsigned long **)data; + + set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); + (*frames)++; + return 0; +} + +static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + + set_pte_at(&init_mm, addr, pte, __pte(0)); + return 0; +} + +int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + struct grant_entry **__shared) +{ + int rc; + struct grant_entry *shared = *__shared; + + if (shared == NULL) { + struct vm_struct *area = + xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes); + BUG_ON(area == NULL); + shared = area->addr; + *__shared = shared; + } + + rc = apply_to_page_range(&init_mm, (unsigned long)shared, + PAGE_SIZE * nr_gframes, + map_pte_fn, &frames); + return rc; +} + +void arch_gnttab_unmap_shared(struct grant_entry *shared, + unsigned long nr_gframes) +{ + apply_to_page_range(&init_mm, (unsigned long)shared, + PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); +} diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2a054ef2a3d..126766d43ae 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -156,6 +156,10 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { + /* updates to init_mm may be done without lock */ + if (mm == &init_mm) + preempt_disable(); + if (mm == current->mm || mm == &init_mm) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { struct multicall_space mcs; @@ -163,14 +167,61 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); xen_mc_issue(PARAVIRT_LAZY_MMU); - return; + goto out; } else if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) - return; + goto out; } xen_set_pte(ptep, pteval); + +out: + if (mm == &init_mm) + preempt_enable(); +} + +pteval_t xen_pte_val(pte_t pte) +{ + pteval_t ret = pte.pte; + + if (ret & _PAGE_PRESENT) + ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; + + return ret; +} + +pgdval_t xen_pgd_val(pgd_t pgd) +{ + pgdval_t ret = pgd.pgd; + if (ret & _PAGE_PRESENT) + ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; + return ret; +} + +pte_t xen_make_pte(pteval_t pte) +{ + if (pte & _PAGE_PRESENT) { + pte = phys_to_machine(XPADDR(pte)).maddr; + pte &= ~(_PAGE_PCD | _PAGE_PWT); + } + + return (pte_t){ .pte = pte }; } +pgd_t xen_make_pgd(pgdval_t pgd) +{ + if (pgd & _PAGE_PRESENT) + pgd = phys_to_machine(XPADDR(pgd)).maddr; + + return (pgd_t){ pgd }; +} + +pmdval_t xen_pmd_val(pmd_t pmd) +{ + pmdval_t ret = native_pmd_val(pmd); + if (ret & _PAGE_PRESENT) + ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; + return ret; +} #ifdef CONFIG_X86_PAE void xen_set_pud(pud_t *ptr, pud_t val) { @@ -214,100 +265,18 @@ void xen_pmd_clear(pmd_t *pmdp) xen_set_pmd(pmdp, __pmd(0)); } -unsigned long long xen_pte_val(pte_t pte) +pmd_t xen_make_pmd(pmdval_t pmd) { - unsigned long long ret = 0; - - if (pte.pte_low) { - ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low; - ret = machine_to_phys(XMADDR(ret)).paddr | 1; - } - - return ret; -} - -unsigned long long xen_pmd_val(pmd_t pmd) -{ - unsigned long long ret = pmd.pmd; - if (ret) - ret = machine_to_phys(XMADDR(ret)).paddr | 1; - return ret; -} - -unsigned long long xen_pgd_val(pgd_t pgd) -{ - unsigned long long ret = pgd.pgd; - if (ret) - ret = machine_to_phys(XMADDR(ret)).paddr | 1; - return ret; -} - -pte_t xen_make_pte(unsigned long long pte) -{ - if (pte & _PAGE_PRESENT) { - pte = phys_to_machine(XPADDR(pte)).maddr; - pte &= ~(_PAGE_PCD | _PAGE_PWT); - } - - return (pte_t){ .pte = pte }; -} - -pmd_t xen_make_pmd(unsigned long long pmd) -{ - if (pmd & 1) + if (pmd & _PAGE_PRESENT) pmd = phys_to_machine(XPADDR(pmd)).maddr; - return (pmd_t){ pmd }; -} - -pgd_t xen_make_pgd(unsigned long long pgd) -{ - if (pgd & _PAGE_PRESENT) - pgd = phys_to_machine(XPADDR(pgd)).maddr; - - return (pgd_t){ pgd }; + return native_make_pmd(pmd); } #else /* !PAE */ void xen_set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; } - -unsigned long xen_pte_val(pte_t pte) -{ - unsigned long ret = pte.pte_low; - - if (ret & _PAGE_PRESENT) - ret = machine_to_phys(XMADDR(ret)).paddr; - - return ret; -} - -unsigned long xen_pgd_val(pgd_t pgd) -{ - unsigned long ret = pgd.pgd; - if (ret) - ret = machine_to_phys(XMADDR(ret)).paddr | 1; - return ret; -} - -pte_t xen_make_pte(unsigned long pte) -{ - if (pte & _PAGE_PRESENT) { - pte = phys_to_machine(XPADDR(pte)).maddr; - pte &= ~(_PAGE_PCD | _PAGE_PWT); - } - - return (pte_t){ pte }; -} - -pgd_t xen_make_pgd(unsigned long pgd) -{ - if (pgd & _PAGE_PRESENT) - pgd = phys_to_machine(XPADDR(pgd)).maddr; - - return (pgd_t){ pgd }; -} #endif /* CONFIG_X86_PAE */ /* @@ -418,7 +387,7 @@ static void xen_do_pin(unsigned level, unsigned long pfn) static int pin_page(struct page *page, enum pt_level level) { - unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags); + unsigned pgfl = TestSetPagePinned(page); int flush; if (pgfl) @@ -499,7 +468,7 @@ void __init xen_mark_init_mm_pinned(void) static int unpin_page(struct page *page, enum pt_level level) { - unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags); + unsigned pgfl = TestClearPagePinned(page); if (pgfl && !PageHighMem(page)) { void *pt = lowmem_page_address(page); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 2341492bf7a..82517e4a752 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -16,6 +16,7 @@ #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <xen/interface/callback.h> #include <xen/interface/physdev.h> #include <xen/features.h> @@ -68,6 +69,24 @@ static void __init fiddle_vdso(void) *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; } +void xen_enable_sysenter(void) +{ + int cpu = smp_processor_id(); + extern void xen_sysenter_target(void); + /* Mask events on entry, even though they get enabled immediately */ + static struct callback_register sysenter = { + .type = CALLBACKTYPE_sysenter, + .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target }, + .flags = CALLBACKF_mask_events, + }; + + if (!boot_cpu_has(X86_FEATURE_SEP) || + HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) { + clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); + } +} + void __init xen_arch_setup(void) { struct physdev_set_iopl set_iopl; @@ -82,6 +101,8 @@ void __init xen_arch_setup(void) HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, __KERNEL_CS, (unsigned long)xen_failsafe_callback); + xen_enable_sysenter(); + set_iopl.iopl = 1; rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); if (rc != 0) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index e340ff92f6b..94e69000f98 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -36,8 +36,9 @@ #include "mmu.h" static cpumask_t xen_cpu_initialized_map; -static DEFINE_PER_CPU(int, resched_irq); -static DEFINE_PER_CPU(int, callfunc_irq); +static DEFINE_PER_CPU(int, resched_irq) = -1; +static DEFINE_PER_CPU(int, callfunc_irq) = -1; +static DEFINE_PER_CPU(int, debug_irq) = -1; /* * Structure and data for smp_call_function(). This is designed to minimise @@ -72,6 +73,7 @@ static __cpuinit void cpu_bringup_and_idle(void) int cpu = smp_processor_id(); cpu_init(); + xen_enable_sysenter(); preempt_disable(); per_cpu(cpu_state, cpu) = CPU_ONLINE; @@ -88,9 +90,7 @@ static __cpuinit void cpu_bringup_and_idle(void) static int xen_smp_intr_init(unsigned int cpu) { int rc; - const char *resched_name, *callfunc_name; - - per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; + const char *resched_name, *callfunc_name, *debug_name; resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, @@ -114,6 +114,14 @@ static int xen_smp_intr_init(unsigned int cpu) goto fail; per_cpu(callfunc_irq, cpu) = rc; + debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); + rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, + IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING, + debug_name, NULL); + if (rc < 0) + goto fail; + per_cpu(debug_irq, cpu) = rc; + return 0; fail: @@ -121,6 +129,8 @@ static int xen_smp_intr_init(unsigned int cpu) unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); if (per_cpu(callfunc_irq, cpu) >= 0) unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); + if (per_cpu(debug_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); return rc; } @@ -183,7 +193,7 @@ void __init xen_smp_prepare_cpus(unsigned int max_cpus) /* Restrict the possible_map according to max_cpus. */ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { - for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--) + for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) continue; cpu_clear(cpu, cpu_possible_map); } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index fe161ed4b01..2497a30f41d 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -108,6 +108,20 @@ ENDPATCH(xen_restore_fl_direct) RELOC(xen_restore_fl_direct, 2b+1) /* + We can't use sysexit directly, because we're not running in ring0. + But we can easily fake it up using iret. Assuming xen_sysexit + is jumped to with a standard stack frame, we can just strip it + back to a standard iret frame and use iret. + */ +ENTRY(xen_sysexit) + movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ + orl $X86_EFLAGS_IF, PT_EFLAGS(%esp) + lea PT_EIP(%esp), %esp + + jmp xen_iret +ENDPROC(xen_sysexit) + +/* This is run where a normal iret would be run, with the same stack setup: 8: eflags 4: cs @@ -184,8 +198,12 @@ iret_restore_end: region is OK. */ je xen_hypervisor_callback - iret +1: iret xen_iret_end_crit: +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous hyper_iret: /* put this out of line since its very rarely used */ @@ -219,9 +237,7 @@ hyper_iret: ds } SAVE_ALL state eax } : : - ebx } - ---------------- - return addr <- esp + ebx }<- esp ---------------- In order to deliver the nested exception properly, we need to shift @@ -236,10 +252,8 @@ hyper_iret: it's usermode state which we eventually need to restore. */ ENTRY(xen_iret_crit_fixup) - /* offsets +4 for return address */ - /* - Paranoia: Make sure we're really coming from userspace. + Paranoia: Make sure we're really coming from kernel space. One could imagine a case where userspace jumps into the critical range address, but just before the CPU delivers a GP, it decides to deliver an interrupt instead. Unlikely? @@ -248,32 +262,32 @@ ENTRY(xen_iret_crit_fixup) jump instruction itself, not the destination, but some virtual environments get this wrong. */ - movl PT_CS+4(%esp), %ecx + movl PT_CS(%esp), %ecx andl $SEGMENT_RPL_MASK, %ecx cmpl $USER_RPL, %ecx je 2f - lea PT_ORIG_EAX+4(%esp), %esi - lea PT_EFLAGS+4(%esp), %edi + lea PT_ORIG_EAX(%esp), %esi + lea PT_EFLAGS(%esp), %edi /* If eip is before iret_restore_end then stack hasn't been restored yet. */ cmp $iret_restore_end, %eax jae 1f - movl 0+4(%edi),%eax /* copy EAX */ - movl %eax, PT_EAX+4(%esp) + movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ + movl %eax, PT_EAX(%esp) lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ /* set up the copy */ 1: std - mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */ + mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */ rep movsl cld lea 4(%edi),%esp /* point esp to new frame */ -2: ret +2: jmp xen_do_upcall /* diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 956a491ea99..f1063ae0803 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -2,6 +2,8 @@ #define XEN_OPS_H #include <linux/init.h> +#include <linux/irqreturn.h> +#include <xen/xen-ops.h> /* These are code, but not functions. Defined in entry.S */ extern const char xen_hypervisor_callback[]; @@ -9,7 +11,6 @@ extern const char xen_failsafe_callback[]; void xen_copy_trap_info(struct trap_info *traps); -DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); @@ -19,6 +20,7 @@ extern struct shared_info *HYPERVISOR_shared_info; char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void __init xen_init_IRQ(void); +void xen_enable_sysenter(void); void xen_setup_timer(int cpu); void xen_setup_cpu_clockevents(void); @@ -28,6 +30,8 @@ unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); unsigned long long xen_sched_clock(void); +irqreturn_t xen_debug_interrupt(int irq, void *dev_id); + bool xen_vcpu_stolen(int vcpu); void xen_mark_init_mm_pinned(void); @@ -64,4 +68,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void); DECL_ASM(void, xen_restore_fl_direct, unsigned long); void xen_iret(void); +void xen_sysexit(void); + #endif /* XEN_OPS_H */ |