diff options
139 files changed, 3705 insertions, 1263 deletions
diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt index dda7ecdde87..28d014714ab 100644 --- a/Documentation/arm/Samsung-S3C24XX/Overview.txt +++ b/Documentation/arm/Samsung-S3C24XX/Overview.txt @@ -76,6 +76,15 @@ Machines A S3C2410 based PDA from Acer. There is a Wiki page at http://handhelds.org/moin/moin.cgi/AcerN30Documentation . + AML M5900 + + American Microsystems' M5900 + + Nex Vision Nexcoder + Nex Vision Otom + + Two machines by Nex Vision + Adding New Machines ------------------- @@ -115,6 +124,10 @@ RTC Support for the onboard RTC unit, including alarm function. + This has recently been upgraded to use the new RTC core, + and the module has been renamed to rtc-s3c to fit in with + the new rtc naming scheme. + Watchdog -------- @@ -128,7 +141,7 @@ NAND The current kernels now have support for the s3c2410 NAND controller. If there are any problems the latest linux-mtd - CVS can be found from http://www.linux-mtd.infradead.org/ + code can be found from http://www.linux-mtd.infradead.org/ Serial @@ -168,6 +181,21 @@ Suspend to RAM See Suspend.txt for more information. +SPI +--- + + SPI drivers are available for both the in-built hardware + (although there is no DMA support yet) and a generic + GPIO based solution. + + +LEDs +---- + + There is support for GPIO based LEDs via a platform driver + in the LED subsystem. + + Platform Data ------------- diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 30f3c8c9c12..f2024df7ebe 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -226,6 +226,23 @@ Who: Jean Delvare <khali@linux-fr.org> --------------------------- +What: i2c_adapter.dev + i2c_adapter.list +When: July 2007 +Why: Superfluous, given i2c_adapter.class_dev: + * The "dev" was a stand-in for the physical device node that legacy + drivers would not have; but now it's almost always present. Any + remaining legacy drivers must upgrade (they now trigger warnings). + * The "list" duplicates class device children. + The delay in removing this is so upgraded lm_sensors and libsensors + can get deployed. (Removal causes minor changes in the sysfs layout, + notably the location of the adapter type name and parenting the i2c + client hardware directly from their controller.) +Who: Jean Delvare <khali@linux-fr.org>, + David Brownell <dbrownell@users.sourceforge.net> + +--------------------------- + What: IPv4 only connection tracking/NAT/helpers When: 2.6.22 Why: The new layer 3 independant connection tracking replaces the old diff --git a/Documentation/usb/acm.txt b/Documentation/usb/acm.txt index 737d6104c3f..17f5c2e1a57 100644 --- a/Documentation/usb/acm.txt +++ b/Documentation/usb/acm.txt @@ -46,6 +46,10 @@ Abstract Control Model (USB CDC ACM) specification. 3Com USR ISDN Pro TA + Some cell phones also connect via USB. I know the following phones work: + + SonyEricsson K800i + Unfortunately many modems and most ISDN TAs use proprietary interfaces and thus won't work with this drivers. Check for ACM compliance before buying. diff --git a/MAINTAINERS b/MAINTAINERS index d5a97d3e23c..2bd34ef58ff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -532,13 +532,13 @@ L: netdev@vger.kernel.org S: Maintained ASUS ACPI EXTRAS DRIVER +P: Corentin Chary +M: corentincj@iksaif.net P: Karol Kozimor M: sziwan@users.sourceforge.net -P: Julien Lerouge -M: julien.lerouge@free.fr L: acpi4asus-user@lists.sourceforge.net W: http://sourceforge.net/projects/acpi4asus -W: http://julien.lerouge.free.fr +W: http://xf.iksaif.net/acpi4asus S: Maintained ATA OVER ETHERNET DRIVER @@ -1564,10 +1564,9 @@ T: git kernel.org:/pub/scm/linux/kernel/git/bart/ide-2.6.git S: Maintained IDE/ATAPI CDROM DRIVER -P: Jens Axboe -M: axboe@kernel.dk -L: linux-kernel@vger.kernel.org -W: http://www.kernel.dk +P: Alan Cox +M: alan@lxorguk.ukuu.org.uk +L: linux-ide@vger.kernel.org S: Maintained IDE/ATAPI FLOPPY DRIVERS @@ -2580,6 +2579,12 @@ P: Adam Belay M: ambx1@neo.rr.com S: Maintained +PNXxxxx I2C DRIVER +P: Vitaly Wool +M: vitalywool@gmail.com +L: i2c@lm-sensors.org +S: Maintained + PPP PROTOCOL DRIVERS AND COMPRESSORS P: Paul Mackerras M: paulus@samba.org @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 20 -EXTRAVERSION =-rc3 +EXTRAVERSION =-rc4 NAME = Homicidal Dwarf Hamster # *DOCUMENTATION* diff --git a/arch/arm/mach-iop13xx/io.c b/arch/arm/mach-iop13xx/io.c index fbf9f88e46e..e79a1b62600 100644 --- a/arch/arm/mach-iop13xx/io.c +++ b/arch/arm/mach-iop13xx/io.c @@ -21,6 +21,25 @@ #include <asm/hardware.h> #include <asm/io.h> +void * __iomem __iop13xx_io(unsigned long io_addr) +{ + void __iomem * io_virt; + + switch (io_addr) { + case IOP13XX_PCIE_LOWER_IO_PA ... IOP13XX_PCIE_UPPER_IO_PA: + io_virt = (void *) IOP13XX_PCIE_IO_PHYS_TO_VIRT(io_addr); + break; + case IOP13XX_PCIX_LOWER_IO_PA ... IOP13XX_PCIX_UPPER_IO_PA: + io_virt = (void *) IOP13XX_PCIX_IO_PHYS_TO_VIRT(io_addr); + break; + default: + BUG(); + } + + return io_virt; +} +EXPORT_SYMBOL(__iop13xx_io); + void * __iomem __iop13xx_ioremap(unsigned long cookie, size_t size, unsigned long flags) { diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c index 6ae605857ca..9de1278d234 100644 --- a/arch/arm/mach-pxa/generic.c +++ b/arch/arm/mach-pxa/generic.c @@ -76,7 +76,9 @@ unsigned long long sched_clock(void) /* * 96-bit math to perform tick * NSEC_PER_SEC / CLOCK_TICK_RATE for * any value of CLOCK_TICK_RATE. Max value is in the 80 thousand - * years range which is nice, but with higher computation cost. + * years range and truncation to unsigned long long limits it to + * sched_clock's max range of ~584 years. This is nice but with + * higher computation cost. */ { union { diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 3775b8f3842..ee2beb40041 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -101,7 +101,7 @@ static struct irqaction pxa_timer_irq = { .handler = pxa_timer_interrupt, }; -cycle_t pxa_get_cycles(void) +static cycle_t pxa_get_cycles(void) { return OSCR; } @@ -134,13 +134,13 @@ static void __init pxa_timer_init(void) OSMR0 = OSCR + LATCH; /* set initial match */ local_irq_restore(flags); - /* on PXA OSCR runs continiously and is not written to, so we can use it - * as clock source directly. + /* + * OSCR runs continuously on PXA and is not written to, + * so we can use it as clock source directly. */ clocksource_pxa.mult = clocksource_hz2mult(CLOCK_TICK_RATE, clocksource_pxa.shift); clocksource_register(&clocksource_pxa); - } #ifdef CONFIG_NO_IDLE_HZ diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c index 717322a0916..fa860e716b4 100644 --- a/arch/arm/mach-s3c2410/dma.c +++ b/arch/arm/mach-s3c2410/dma.c @@ -1053,11 +1053,11 @@ int s3c2410_dma_config(dmach_t channel, if (chan == NULL) return -EINVAL; - printk("Initial dcon is %08x\n", dcon); + pr_debug("%s: Initial dcon is %08x\n", __FUNCTION__, dcon); dcon |= chan->dcon & dma_sel.dcon_mask; - printk("New dcon is %08x\n", dcon); + pr_debug("%s: New dcon is %08x\n", __FUNCTION__, dcon); switch (xferunit) { case 1: diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index 408b05ae6b9..ded0e96d069 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c @@ -19,6 +19,7 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> +#include <asm/cacheflush.h> #include "mm.h" @@ -69,6 +70,11 @@ mc_copy_user_page(void *from, void *to) void v4_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) { + struct page *page = virt_to_page(kfrom); + + if (test_and_clear_bit(PG_dcache_dirty, &page->flags)) + __flush_dcache_page(page_mapping(page), page); + spin_lock(&minicache_lock); set_pte_ext(TOP_PTE(0xffff8000), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot), 0); diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 865777dec16..3adb79257f4 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -53,6 +53,10 @@ static void v6_copy_user_page_aliasing(void *kto, const void *kfrom, unsigned lo { unsigned int offset = CACHE_COLOUR(vaddr); unsigned long from, to; + struct page *page = virt_to_page(kfrom); + + if (test_and_clear_bit(PG_dcache_dirty, &page->flags)) + __flush_dcache_page(page_mapping(page), page); /* * Discard data in the kernel mapping for the new page. diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c index aea5da72359..2e455f82a4d 100644 --- a/arch/arm/mm/copypage-xscale.c +++ b/arch/arm/mm/copypage-xscale.c @@ -19,6 +19,7 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> +#include <asm/cacheflush.h> #include "mm.h" @@ -91,6 +92,11 @@ mc_copy_user_page(void *from, void *to) void xscale_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) { + struct page *page = virt_to_page(kfrom); + + if (test_and_clear_bit(PG_dcache_dirty, &page->flags)) + __flush_dcache_page(page_mapping(page), page); + spin_lock(&minicache_lock); set_pte_ext(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot), 0); diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index e26cc1f5994..490d9d18a7d 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -264,6 +264,18 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); } +static void vfp_enable(void *unused) +{ + u32 access = get_copro_access(); + + /* + * Enable full access to VFP (cp10 and cp11) + */ + set_copro_access(access | CPACC_FULL(10) | CPACC_FULL(11)); +} + +#include <linux/smp.h> + /* * VFP support code initialisation. */ @@ -288,6 +300,7 @@ static int __init vfp_init(void) * we just need to read the VFPSID register. */ vfpsid = fmrx(FPSID); + barrier(); printk(KERN_INFO "VFP support v0.3: "); if (VFP_arch) { @@ -301,6 +314,8 @@ static int __init vfp_init(void) } else if (vfpsid & FPSID_NODOUBLE) { printk("no double precision support\n"); } else { + smp_call_function(vfp_enable, NULL, 1, 1); + VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT; /* Extract the architecture version */ printk("implementor %02x architecture %d part %02x variant %x rev %x\n", (vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT, diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 0d67a0a1151..0dfee812811 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -777,6 +777,47 @@ config CRASH_DUMP PHYSICAL_START. For more details see Documentation/kdump/kdump.txt +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) + default "0x100000" + help + This gives the physical address where the kernel is loaded. + + If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then + bzImage will decompress itself to above physical address and + run from there. Otherwise, bzImage will run from the address where + it has been loaded by the boot loader and will ignore above physical + address. + + In normal kdump cases one does not have to set/change this option + as now bzImage can be compiled as a completely relocatable image + (CONFIG_RELOCATABLE=y) and be used to load and run from a different + address. This option is mainly useful for the folks who don't want + to use a bzImage for capturing the crash dump and want to use a + vmlinux instead. vmlinux is not relocatable hence a kernel needs + to be specifically compiled to run from a specific memory area + (normally a reserved region) and this option comes handy. + + So if you are using bzImage for capturing the crash dump, leave + the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y. + Otherwise if you plan to use vmlinux for capturing the crash dump + change this value to start of the reserved region (Typically 16MB + 0x1000000). In other words, it can be set based on the "X" value as + specified in the "crashkernel=YM@XM" command line boot parameter + passed to the panic-ed kernel. Typically this parameter is set as + crashkernel=64M@16M. Please take a look at + Documentation/kdump/kdump.txt for more details about crash dumps. + + Usage of bzImage for capturing the crash dump is recommended as + one does not have to build two kernels. Same kernel can be used + as production kernel and capture kernel. Above option should have + gone away after relocatable bzImage support is introduced. But it + is present because there are users out there who continue to use + vmlinux for dump capture. This option should go away down the + line. + + Don't change this unless you know what you are doing. + config RELOCATABLE bool "Build a relocatable kernel(EXPERIMENTAL)" depends on EXPERIMENTAL diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index f395a4bb38b..3517a32aaf4 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -28,7 +28,7 @@ #include <asm/page.h> #include <asm/boot.h> -.section ".text.head" +.section ".text.head","ax",@progbits .globl startup_32 startup_32: diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 094300b3a81..cbcb2c27f48 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -333,7 +333,7 @@ acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end) /* * Parse Interrupt Source Override for the ACPI SCI */ -static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) +static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) { if (trigger == 0) /* compatible SCI trigger is level */ trigger = 3; diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 1b34c56f812..8689d62abd4 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -54,7 +54,7 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", }; -static struct cpu_dev * this_cpu = &default_cpu; +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; static int __init cachesize_setup(char *str) { diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index b735458c6e3..10baa3501ed 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -373,8 +373,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, cpumask_t online_policy_cpus; struct drv_cmd cmd; unsigned int msr; - unsigned int next_state = 0; - unsigned int next_perf_state = 0; + unsigned int next_state = 0; /* Index into freq_table */ + unsigned int next_perf_state = 0; /* Index into perf table */ unsigned int i; int result = 0; @@ -420,6 +420,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, msr = (u32) perf->states[next_perf_state]. control & INTEL_MSR_RANGE; + cmd.val = get_cur_val(online_policy_cpus); cmd.val = (cmd.val & ~INTEL_MSR_RANGE) | msr; break; case SYSTEM_IO_CAPABLE: @@ -439,8 +440,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, else cpu_set(policy->cpu, cmd.mask); - freqs.old = data->freq_table[perf->state].frequency; - freqs.new = data->freq_table[next_perf_state].frequency; + freqs.old = perf->states[perf->state].core_frequency * 1000; + freqs.new = data->freq_table[next_state].frequency; for_each_cpu_mask(i, cmd.mask) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); @@ -677,6 +678,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) valid_states++; } data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; + perf->state = 0; result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); if (result) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 6d9c97a690f..e940e00b96c 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -52,6 +52,10 @@ #define CPU_EZRA_T 4 #define CPU_NEHEMIAH 5 +/* Flags */ +#define USE_ACPI_C3 (1 << 1) +#define USE_NORTHBRIDGE (1 << 2) + static int cpu_model; static unsigned int numscales=16; static unsigned int fsb; @@ -68,7 +72,7 @@ static unsigned int minmult, maxmult; static int can_scale_voltage; static struct acpi_processor *pr = NULL; static struct acpi_processor_cx *cx = NULL; -static int port22_en; +static u8 longhaul_flags; /* Module parameters */ static int scale_voltage; @@ -80,7 +84,6 @@ static int ignore_latency; /* Clock ratios multiplied by 10 */ static int clock_ratio[32]; static int eblcr_table[32]; -static unsigned int highest_speed, lowest_speed; /* kHz */ static int longhaul_version; static struct cpufreq_frequency_table *longhaul_table; @@ -178,7 +181,7 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) safe_halt(); /* Change frequency on next halt or sleep */ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - if (port22_en) { + if (!cx_address) { ACPI_FLUSH_CPU_CACHE(); /* Invoke C1 */ halt(); @@ -189,7 +192,6 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) /* Dummy op - must do something useless after P_LVL3 read */ t = inl(acpi_fadt.xpm_tmr_blk.address); } - /* Disable bus ratio bit */ local_irq_disable(); longhaul.bits.RevisionKey = longhaul.bits.RevisionID; @@ -243,15 +245,14 @@ static void longhaul_setstate(unsigned int clock_ratio_index) outb(0xFF,0xA1); /* Overkill */ outb(0xFE,0x21); /* TMR0 only */ - if (pr->flags.bm_control) { + if (longhaul_flags & USE_NORTHBRIDGE) { + /* Disable AGP and PCI arbiters */ + outb(3, 0x22); + } else if ((pr != NULL) && pr->flags.bm_control) { /* Disable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, ACPI_MTX_DO_NOT_LOCK); - } else if (port22_en) { - /* Disable AGP and PCI arbiters */ - outb(3, 0x22); } - switch (longhaul_version) { /* @@ -278,22 +279,25 @@ static void longhaul_setstate(unsigned int clock_ratio_index) * to work in practice. */ case TYPE_POWERSAVER: - /* Don't allow wakeup */ - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, - ACPI_MTX_DO_NOT_LOCK); - do_powersaver(cx->address, clock_ratio_index); + if (longhaul_flags & USE_ACPI_C3) { + /* Don't allow wakeup */ + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, + ACPI_MTX_DO_NOT_LOCK); + do_powersaver(cx->address, clock_ratio_index); + } else { + do_powersaver(0, clock_ratio_index); + } break; } - if (pr->flags.bm_control) { + if (longhaul_flags & USE_NORTHBRIDGE) { + /* Enable arbiters */ + outb(0, 0x22); + } else if ((pr != NULL) && pr->flags.bm_control) { /* Enable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, ACPI_MTX_DO_NOT_LOCK); - } else if (port22_en) { - /* Enable arbiters */ - outb(0, 0x22); } - outb(pic2_mask,0xA1); /* restore mask */ outb(pic1_mask,0x21); @@ -314,12 +318,12 @@ static void longhaul_setstate(unsigned int clock_ratio_index) #define ROUNDING 0xf -static int _guess(int guess) +static int _guess(int guess, int mult) { int target; - target = ((maxmult/10)*guess); - if (maxmult%10 != 0) + target = ((mult/10)*guess); + if (mult%10 != 0) target += (guess/2); target += ROUNDING/2; target &= ~ROUNDING; @@ -327,17 +331,17 @@ static int _guess(int guess) } -static int guess_fsb(void) +static int guess_fsb(int mult) { int speed = (cpu_khz/1000); int i; - int speeds[3] = { 66, 100, 133 }; + int speeds[] = { 66, 100, 133, 200 }; speed += ROUNDING/2; speed &= ~ROUNDING; - for (i=0; i<3; i++) { - if (_guess(speeds[i]) == speed) + for (i=0; i<4; i++) { + if (_guess(speeds[i], mult) == speed) return speeds[i]; } return 0; @@ -354,9 +358,7 @@ static int __init longhaul_get_ranges(void) 130, 150, 160, 140, -1, 155, -1, 145 }; unsigned int j, k = 0; union msr_longhaul longhaul; - unsigned long lo, hi; - unsigned int eblcr_fsb_table_v1[] = { 66, 133, 100, -1 }; - unsigned int eblcr_fsb_table_v2[] = { 133, 100, -1, 66 }; + int mult = 0; switch (longhaul_version) { case TYPE_LONGHAUL_V1: @@ -364,30 +366,18 @@ static int __init longhaul_get_ranges(void) /* Ugh, Longhaul v1 didn't have the min/max MSRs. Assume min=3.0x & max = whatever we booted at. */ minmult = 30; - maxmult = longhaul_get_cpu_mult(); - rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); - invalue = (lo & (1<<18|1<<19)) >>18; - if (cpu_model==CPU_SAMUEL || cpu_model==CPU_SAMUEL2) - fsb = eblcr_fsb_table_v1[invalue]; - else - fsb = guess_fsb(); + maxmult = mult = longhaul_get_cpu_mult(); break; case TYPE_POWERSAVER: /* Ezra-T */ if (cpu_model==CPU_EZRA_T) { + minmult = 30; rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); invalue = longhaul.bits.MaxMHzBR; if (longhaul.bits.MaxMHzBR4) invalue += 16; - maxmult=ezra_t_multipliers[invalue]; - - invalue = longhaul.bits.MinMHzBR; - if (longhaul.bits.MinMHzBR4 == 1) - minmult = 30; - else - minmult = ezra_t_multipliers[invalue]; - fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB]; + maxmult = mult = ezra_t_multipliers[invalue]; break; } @@ -407,21 +397,16 @@ static int __init longhaul_get_ranges(void) * But it works, so we don't grumble. */ minmult=40; - maxmult=longhaul_get_cpu_mult(); - - /* Starting with the 1.2GHz parts, theres a 200MHz bus. */ - if ((cpu_khz/maxmult) > 13400) - fsb = 200; - else - fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB]; + maxmult = mult = longhaul_get_cpu_mult(); break; } } + fsb = guess_fsb(mult); dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", minmult/10, minmult%10, maxmult/10, maxmult%10); - if (fsb == -1) { + if (fsb == 0) { printk (KERN_INFO PFX "Invalid (reserved) FSB!\n"); return -EINVAL; } @@ -691,27 +676,32 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) /* Find ACPI data for processor */ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, (void *)&pr); - if (pr == NULL) - goto err_acpi; - if (longhaul_version == TYPE_POWERSAVER) { - /* Check ACPI support for C3 state */ + /* Check ACPI support for C3 state */ + if ((pr != NULL) && (longhaul_version == TYPE_POWERSAVER)) { cx = &pr->power.states[ACPI_STATE_C3]; if (cx->address > 0 && (cx->latency <= 1000 || ignore_latency != 0) ) { + longhaul_flags |= USE_ACPI_C3; goto print_support_type; } } + /* Check if northbridge is friendly */ + if (enable_arbiter_disable()) { + longhaul_flags |= USE_NORTHBRIDGE; + goto print_support_type; + } + + /* No ACPI C3 or we can't use it */ /* Check ACPI support for bus master arbiter disable */ - if (!pr->flags.bm_control) { - if (enable_arbiter_disable()) { - port22_en = 1; - } else { - goto err_acpi; - } + if ((pr == NULL) || !(pr->flags.bm_control)) { + printk(KERN_ERR PFX + "No ACPI support. Unsupported northbridge.\n"); + return -ENODEV; } + print_support_type: - if (!port22_en) { + if (!(longhaul_flags & USE_NORTHBRIDGE)) { printk (KERN_INFO PFX "Using ACPI support.\n"); } else { printk (KERN_INFO PFX "Using northbridge support.\n"); @@ -736,10 +726,6 @@ print_support_type: cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); return 0; - -err_acpi: - printk(KERN_ERR PFX "No ACPI support. Unsupported northbridge. Aborting.\n"); - return -ENODEV; } static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) @@ -774,8 +760,8 @@ static int __init longhaul_init(void) #ifdef CONFIG_SMP if (num_online_cpus() > 1) { - return -ENODEV; printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); + return -ENODEV; } #endif #ifdef CONFIG_X86_IO_APIC diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 5113e923163..f43b987f952 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -533,9 +533,9 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); - printk("speedstep-centrino with X86_SPEEDSTEP_CENTRINO_ACPI" - "config is deprecated.\n " - "Use X86_ACPI_CPUFREQ (acpi-cpufreq instead.\n" ); + printk("speedstep-centrino with X86_SPEEDSTEP_CENTRINO_ACPI " + "config is deprecated.\n " + "Use X86_ACPI_CPUFREQ (acpi-cpufreq) instead.\n" ); return 0; diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index aef39be8136..300d9b38d02 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -227,7 +227,7 @@ static struct { atomic_t count_start; atomic_t count_stop; unsigned long long values[NR_CPUS]; -} tsc __initdata = { +} tsc __cpuinitdata = { .start_flag = ATOMIC_INIT(0), .count_start = ATOMIC_INIT(0), .count_stop = ATOMIC_INIT(0), @@ -332,7 +332,7 @@ static void __init synchronize_tsc_bp(void) printk("passed.\n"); } -static void __init synchronize_tsc_ap(void) +static void __cpuinit synchronize_tsc_ap(void) { int i; diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S index fcce0e61b0e..2f1814c5cfd 100644 --- a/arch/i386/kernel/trampoline.S +++ b/arch/i386/kernel/trampoline.S @@ -38,6 +38,11 @@ .data +/* We can free up trampoline after bootup if cpu hotplug is not supported. */ +#ifndef CONFIG_HOTPLUG_CPU +.section ".init.data","aw",@progbits +#endif + .code16 ENTRY(trampoline_data) diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index ef667245569..d4275537b25 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -480,13 +480,14 @@ config CALGARY_IOMMU config CALGARY_IOMMU_ENABLED_BY_DEFAULT bool "Should Calgary be enabled by default?" + default y depends on CALGARY_IOMMU help - Should Calgary be enabled by default? If you choose 'y', Calgary + Should Calgary be enabled by default? if you choose 'y', Calgary will be used (if it exists). If you choose 'n', Calgary will not be used even if it exists. If you choose 'n' and would like to use Calgary anyway, pass 'iommu=calgary' on the kernel command line. - If unsure, say N. + If unsure, say Y. # need this always selected by IOMMU for the VIA workaround config SWIOTLB diff --git a/arch/x86_64/kernel/cpufreq/Kconfig b/arch/x86_64/kernel/cpufreq/Kconfig index 3abcfa3e1ed..45a6a1fd14a 100644 --- a/arch/x86_64/kernel/cpufreq/Kconfig +++ b/arch/x86_64/kernel/cpufreq/Kconfig @@ -49,6 +49,7 @@ config X86_SPEEDSTEP_CENTRINO_ACPI config X86_ACPI_CPUFREQ tristate "ACPI Processor P-States driver" + select CPU_FREQ_TABLE depends on ACPI_PROCESSOR help This driver adds a CPUFreq driver which utilizes the ACPI diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 1d9eb6db732..09d2e8a10a4 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -319,7 +319,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, /* * This handles the process stack: */ - tinfo = current_thread_info(); + tinfo = task_thread_info(tsk); HANDLE_STACK (valid_stack_ptr(tinfo, stack)); #undef HANDLE_STACK put_cpu(); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4b4217d9be7..07b70624377 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -577,9 +577,9 @@ static int cfq_allow_merge(request_queue_t *q, struct request *rq, pid_t key; /* - * Disallow merge, if bio and rq aren't both sync or async + * Disallow merge of a sync bio into an async request. */ - if (!!bio_sync(bio) != !!rq_is_sync(rq)) + if ((bio_data_dir(bio) == READ || bio_sync(bio)) && !rq_is_sync(rq)) return 0; /* @@ -592,7 +592,7 @@ static int cfq_allow_merge(request_queue_t *q, struct request *rq, if (cfqq == RQ_CFQQ(rq)) return 1; - return 1; + return 0; } static inline void diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 9c52d87d6f0..4144d5dd442 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -424,7 +424,7 @@ static void acpi_ec_gpe_query(void *ec_cxt) snprintf(object_name, 8, "_Q%2.2X", value); - printk(KERN_INFO PREFIX "evaluating %s\n", object_name); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Evaluating %s", object_name)); acpi_evaluate_object(ec->handle, object_name, NULL, NULL); } diff --git a/drivers/acpi/toshiba_acpi.c b/drivers/acpi/toshiba_acpi.c index 88aeccbafaa..d9b651ffcdc 100644 --- a/drivers/acpi/toshiba_acpi.c +++ b/drivers/acpi/toshiba_acpi.c @@ -321,13 +321,16 @@ static int set_lcd_status(struct backlight_device *bd) static unsigned long write_lcd(const char *buffer, unsigned long count) { int value; - int ret = count; + int ret; if (sscanf(buffer, " brightness : %i", &value) == 1 && - value >= 0 && value < HCI_LCD_BRIGHTNESS_LEVELS) + value >= 0 && value < HCI_LCD_BRIGHTNESS_LEVELS) { ret = set_lcd(value); - else + if (ret == 0) + ret = count; + } else { ret = -EINVAL; + } return ret; } diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index b34e0a958d0..da21552d2b1 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -381,7 +381,7 @@ config PATA_OPTI If unsure, say N. config PATA_OPTIDMA - tristate "OPTI FireStar PATA support (Veyr Experimental)" + tristate "OPTI FireStar PATA support (Very Experimental)" depends on PCI && EXPERIMENTAL help This option enables DMA/PIO support for the later OPTi diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 10ee22ae5c1..623cec914c9 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1027,13 +1027,15 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, #endif } - rc = pci_request_regions(pdev, DRV_NAME); - if (rc) { - disable_dev_on_err = 0; - goto err_out; - } - - if (legacy_mode) { + if (!legacy_mode) { + rc = pci_request_regions(pdev, DRV_NAME); + if (rc) { + disable_dev_on_err = 0; + goto err_out; + } + } else { + /* Deal with combined mode hack. This side of the logic all + goes away once the combined mode hack is killed in 2.6.21 */ if (!request_region(ATA_PRIMARY_CMD, 8, "libata")) { struct resource *conflict, res; res.start = ATA_PRIMARY_CMD; @@ -1071,6 +1073,13 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, } } else legacy_mode |= ATA_PORT_SECONDARY; + + if (legacy_mode & ATA_PORT_PRIMARY) + pci_request_region(pdev, 1, DRV_NAME); + if (legacy_mode & ATA_PORT_SECONDARY) + pci_request_region(pdev, 3, DRV_NAME); + /* If there is a DMA resource, allocate it */ + pci_request_region(pdev, 4, DRV_NAME); } /* we have legacy mode, but all ports are unavailable */ @@ -1114,11 +1123,20 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, err_out_ent: kfree(probe_ent); err_out_regions: - if (legacy_mode & ATA_PORT_PRIMARY) - release_region(ATA_PRIMARY_CMD, 8); - if (legacy_mode & ATA_PORT_SECONDARY) - release_region(ATA_SECONDARY_CMD, 8); - pci_release_regions(pdev); + /* All this conditional stuff is needed for the combined mode hack + until 2.6.21 when it can go */ + if (legacy_mode) { + pci_release_region(pdev, 4); + if (legacy_mode & ATA_PORT_PRIMARY) { + release_region(ATA_PRIMARY_CMD, 8); + pci_release_region(pdev, 1); + } + if (legacy_mode & ATA_PORT_SECONDARY) { + release_region(ATA_SECONDARY_CMD, 8); + pci_release_region(pdev, 3); + } + } else + pci_release_regions(pdev); err_out: if (disable_dev_on_err) pci_disable_device(pdev); diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c index 47082df7199..dfb306057cf 100644 --- a/drivers/ata/pata_hpt37x.c +++ b/drivers/ata/pata_hpt37x.c @@ -25,7 +25,7 @@ #include <linux/libata.h> #define DRV_NAME "pata_hpt37x" -#define DRV_VERSION "0.5.1" +#define DRV_VERSION "0.5.2" struct hpt_clock { u8 xfer_speed; @@ -416,7 +416,7 @@ static const char *bad_ata100_5[] = { static unsigned long hpt370_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask) { - if (adev->class != ATA_DEV_ATA) { + if (adev->class == ATA_DEV_ATA) { if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33)) mask &= ~ATA_MASK_UDMA; if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5)) @@ -749,7 +749,7 @@ static void hpt37x_bmdma_stop(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct pci_dev *pdev = to_pci_dev(ap->host->dev); - int mscreg = 0x50 + 2 * ap->port_no; + int mscreg = 0x50 + 4 * ap->port_no; u8 bwsr_stat, msc_stat; pci_read_config_byte(pdev, 0x6A, &bwsr_stat); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 7c95c762950..62462190e07 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -765,47 +765,34 @@ static inline struct bio *pkt_get_list_first(struct bio **list_head, struct bio */ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc) { - char sense[SCSI_SENSE_BUFFERSIZE]; - request_queue_t *q; + request_queue_t *q = bdev_get_queue(pd->bdev); struct request *rq; - DECLARE_COMPLETION_ONSTACK(wait); - int err = 0; + int ret = 0; - q = bdev_get_queue(pd->bdev); + rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? + WRITE : READ, __GFP_WAIT); + + if (cgc->buflen) { + if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT)) + goto out; + } + + rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); + memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); + if (sizeof(rq->cmd) > CDROM_PACKET_SIZE) + memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE); - rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? WRITE : READ, - __GFP_WAIT); - rq->errors = 0; - rq->rq_disk = pd->bdev->bd_disk; - rq->bio = NULL; - rq->buffer = NULL; rq->timeout = 60*HZ; - rq->data = cgc->buffer; - rq->data_len = cgc->buflen; - rq->sense = sense; - memset(sense, 0, sizeof(sense)); - rq->sense_len = 0; rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->cmd_flags |= REQ_HARDBARRIER; if (cgc->quiet) rq->cmd_flags |= REQ_QUIET; - memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); - if (sizeof(rq->cmd) > CDROM_PACKET_SIZE) - memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE); - rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); - - rq->ref_count++; - rq->end_io_data = &wait; - rq->end_io = blk_end_sync_rq; - elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); - generic_unplug_device(q); - wait_for_completion(&wait); - - if (rq->errors) - err = -EIO; + blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0); + ret = rq->errors; +out: blk_put_request(rq); - return err; + return ret; } /* diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 66d028d3043..3105dddf59f 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -337,6 +337,12 @@ static const char *mrw_address_space[] = { "DMA", "GAA" }; /* used in the audio ioctls */ #define CHECKAUDIO if ((ret=check_for_audio_disc(cdi, cdo))) return ret +/* + * Another popular OS uses 7 seconds as the hard timeout for default + * commands, so it is a good choice for us as well. + */ +#define CDROM_DEF_TIMEOUT (7 * HZ) + /* Not-exported routines. */ static int open_for_data(struct cdrom_device_info * cdi); static int check_for_audio_disc(struct cdrom_device_info * cdi, @@ -1528,7 +1534,7 @@ void init_cdrom_command(struct packet_command *cgc, void *buf, int len, cgc->buffer = (char *) buf; cgc->buflen = len; cgc->data_direction = type; - cgc->timeout = 5*HZ; + cgc->timeout = CDROM_DEF_TIMEOUT; } /* DVD handling */ diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index 8b3317fd46c..1d59e2a5b9a 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h @@ -225,6 +225,10 @@ struct agp_bridge_data { #define I810_GMS_DISABLE 0x00000000 #define I810_PGETBL_CTL 0x2020 #define I810_PGETBL_ENABLED 0x00000001 +#define I965_PGETBL_SIZE_MASK 0x0000000e +#define I965_PGETBL_SIZE_512KB (0 << 1) +#define I965_PGETBL_SIZE_256KB (1 << 1) +#define I965_PGETBL_SIZE_128KB (2 << 1) #define I810_DRAM_CTL 0x3000 #define I810_DRAM_ROW_0 0x00000001 #define I810_DRAM_ROW_0_SDRAM 0x00000001 diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 2f2c4efff8a..979300405c0 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -650,6 +650,15 @@ static struct pci_device_id agp_amd64_pci_table[] = { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, + /* VIA K8M890 / K8N890 */ + { + .class = (PCI_CLASS_BRIDGE_HOST << 8), + .class_mask = ~0, + .vendor = PCI_VENDOR_ID_VIA, + .device = PCI_DEVICE_ID_VIA_K8M890CE, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + }, /* VIA K8T890 */ { .class = (PCI_CLASS_BRIDGE_HOST << 8), diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index 883a36a2783..3491d6f84bc 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -965,6 +965,9 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type) if (!bridge) return -EINVAL; + if (mem->page_count == 0) + return 0; + temp = bridge->current_size; switch (bridge->driver->size_type) { @@ -1016,8 +1019,8 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type) for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { writel(bridge->driver->mask_memory(bridge, mem->memory[i], mem->type), bridge->gatt_table+j); - readl(bridge->gatt_table+j); /* PCI Posting. */ } + readl(bridge->gatt_table+j-1); /* PCI Posting. */ bridge->driver->tlb_flush(mem); return 0; @@ -1034,6 +1037,9 @@ int agp_generic_remove_memory(struct agp_memory *mem, off_t pg_start, int type) if (!bridge) return -EINVAL; + if (mem->page_count == 0) + return 0; + if (type != 0 || mem->type != 0) { /* The generic routines know nothing of memory types */ return -EINVAL; @@ -1042,10 +1048,9 @@ int agp_generic_remove_memory(struct agp_memory *mem, off_t pg_start, int type) /* AK: bogus, should encode addresses > 4GB */ for (i = pg_start; i < (mem->page_count + pg_start); i++) { writel(bridge->scratch_page, bridge->gatt_table+i); - readl(bridge->gatt_table+i); /* PCI Posting. */ } + readl(bridge->gatt_table+i-1); /* PCI Posting. */ - global_cache_flush(); bridge->driver->tlb_flush(mem); return 0; } diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 555b3a8ab49..ab0a9c0ad7c 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -207,6 +207,9 @@ static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start, int i, j, num_entries; void *temp; + if (mem->page_count == 0) + return 0; + temp = agp_bridge->current_size; num_entries = A_SIZE_FIX(temp)->num_entries; @@ -221,12 +224,16 @@ static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start, if (type != 0 || mem->type != 0) { if ((type == AGP_DCACHE_MEMORY) && (mem->type == AGP_DCACHE_MEMORY)) { /* special insert */ - global_cache_flush(); + if (!mem->is_flushed) { + global_cache_flush(); + mem->is_flushed = TRUE; + } + for (i = pg_start; i < (pg_start + mem->page_count); i++) { writel((i*4096)|I810_PTE_LOCAL|I810_PTE_VALID, intel_i810_private.registers+I810_PTE_BASE+(i*4)); - readl(intel_i810_private.registers+I810_PTE_BASE+(i*4)); /* PCI Posting. */ } - global_cache_flush(); + readl(intel_i810_private.registers+I810_PTE_BASE+((i-1)*4)); /* PCI Posting. */ + agp_bridge->driver->tlb_flush(mem); return 0; } @@ -236,14 +243,17 @@ static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start, } insert: - global_cache_flush(); + if (!mem->is_flushed) { + global_cache_flush(); + mem->is_flushed = TRUE; + } + for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { writel(agp_bridge->driver->mask_memory(agp_bridge, mem->memory[i], mem->type), intel_i810_private.registers+I810_PTE_BASE+(j*4)); - readl(intel_i810_private.registers+I810_PTE_BASE+(j*4)); /* PCI Posting. */ } - global_cache_flush(); + readl(intel_i810_private.registers+I810_PTE_BASE+((j-1)*4)); /* PCI Posting. */ agp_bridge->driver->tlb_flush(mem); return 0; @@ -254,12 +264,14 @@ static int intel_i810_remove_entries(struct agp_memory *mem, off_t pg_start, { int i; + if (mem->page_count == 0) + return 0; + for (i = pg_start; i < (mem->page_count + pg_start); i++) { writel(agp_bridge->scratch_page, intel_i810_private.registers+I810_PTE_BASE+(i*4)); - readl(intel_i810_private.registers+I810_PTE_BASE+(i*4)); /* PCI Posting. */ } + readl(intel_i810_private.registers+I810_PTE_BASE+((i-1)*4)); - global_cache_flush(); agp_bridge->driver->tlb_flush(mem); return 0; } @@ -370,6 +382,11 @@ static struct _intel_i830_private { struct pci_dev *i830_dev; /* device one */ volatile u8 __iomem *registers; volatile u32 __iomem *gtt; /* I915G */ + /* gtt_entries is the number of gtt entries that are already mapped + * to stolen memory. Stolen memory is larger than the memory mapped + * through gtt_entries, as it includes some reserved space for the BIOS + * popup and for the GTT. + */ int gtt_entries; } intel_i830_private; @@ -380,14 +397,41 @@ static void intel_i830_init_gtt_entries(void) u8 rdct; int local = 0; static const int ddt[4] = { 0, 16, 32, 64 }; - int size; + int size; /* reserved space (in kb) at the top of stolen memory */ pci_read_config_word(agp_bridge->dev,I830_GMCH_CTRL,&gmch_ctrl); - /* We obtain the size of the GTT, which is also stored (for some - * reason) at the top of stolen memory. Then we add 4KB to that - * for the video BIOS popup, which is also stored in there. */ - size = agp_bridge->driver->fetch_size() + 4; + if (IS_I965) { + u32 pgetbl_ctl; + + pci_read_config_dword(agp_bridge->dev, I810_PGETBL_CTL, + &pgetbl_ctl); + /* The 965 has a field telling us the size of the GTT, + * which may be larger than what is necessary to map the + * aperture. + */ + switch (pgetbl_ctl & I965_PGETBL_SIZE_MASK) { + case I965_PGETBL_SIZE_128KB: + size = 128; + break; + case I965_PGETBL_SIZE_256KB: + size = 256; + break; + case I965_PGETBL_SIZE_512KB: + size = 512; + break; + default: + printk(KERN_INFO PFX "Unknown page table size, " + "assuming 512KB\n"); + size = 512; + } + size += 4; /* add in BIOS popup space */ + } else { + /* On previous hardware, the GTT size was just what was + * required to map the aperture. + */ + size = agp_bridge->driver->fetch_size() + 4; + } if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82830_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82845G_HB) { @@ -576,6 +620,9 @@ static int intel_i830_insert_entries(struct agp_memory *mem,off_t pg_start, int int i,j,num_entries; void *temp; + if (mem->page_count == 0) + return 0; + temp = agp_bridge->current_size; num_entries = A_SIZE_FIX(temp)->num_entries; @@ -598,16 +645,18 @@ static int intel_i830_insert_entries(struct agp_memory *mem,off_t pg_start, int (mem->type != 0 && mem->type != AGP_PHYS_MEMORY)) return -EINVAL; - global_cache_flush(); /* FIXME: Necessary ?*/ + if (!mem->is_flushed) { + global_cache_flush(); + mem->is_flushed = TRUE; + } for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { writel(agp_bridge->driver->mask_memory(agp_bridge, mem->memory[i], mem->type), intel_i830_private.registers+I810_PTE_BASE+(j*4)); - readl(intel_i830_private.registers+I810_PTE_BASE+(j*4)); /* PCI Posting. */ } + readl(intel_i830_private.registers+I810_PTE_BASE+((j-1)*4)); - global_cache_flush(); agp_bridge->driver->tlb_flush(mem); return 0; } @@ -617,7 +666,8 @@ static int intel_i830_remove_entries(struct agp_memory *mem,off_t pg_start, { int i; - global_cache_flush(); + if (mem->page_count == 0) + return 0; if (pg_start < intel_i830_private.gtt_entries) { printk (KERN_INFO PFX "Trying to disable local/stolen memory\n"); @@ -626,10 +676,9 @@ static int intel_i830_remove_entries(struct agp_memory *mem,off_t pg_start, for (i = pg_start; i < (mem->page_count + pg_start); i++) { writel(agp_bridge->scratch_page, intel_i830_private.registers+I810_PTE_BASE+(i*4)); - readl(intel_i830_private.registers+I810_PTE_BASE+(i*4)); /* PCI Posting. */ } + readl(intel_i830_private.registers+I810_PTE_BASE+((i-1)*4)); - global_cache_flush(); agp_bridge->driver->tlb_flush(mem); return 0; } @@ -686,6 +735,9 @@ static int intel_i915_insert_entries(struct agp_memory *mem,off_t pg_start, int i,j,num_entries; void *temp; + if (mem->page_count == 0) + return 0; + temp = agp_bridge->current_size; num_entries = A_SIZE_FIX(temp)->num_entries; @@ -708,15 +760,17 @@ static int intel_i915_insert_entries(struct agp_memory *mem,off_t pg_start, (mem->type != 0 && mem->type != AGP_PHYS_MEMORY)) return -EINVAL; - global_cache_flush(); + if (!mem->is_flushed) { + global_cache_flush(); + mem->is_flushed = TRUE; + } for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { writel(agp_bridge->driver->mask_memory(agp_bridge, mem->memory[i], mem->type), intel_i830_private.gtt+j); - readl(intel_i830_private.gtt+j); /* PCI Posting. */ } + readl(intel_i830_private.gtt+j-1); - global_cache_flush(); agp_bridge->driver->tlb_flush(mem); return 0; } @@ -726,7 +780,8 @@ static int intel_i915_remove_entries(struct agp_memory *mem,off_t pg_start, { int i; - global_cache_flush(); + if (mem->page_count == 0) + return 0; if (pg_start < intel_i830_private.gtt_entries) { printk (KERN_INFO PFX "Trying to disable local/stolen memory\n"); @@ -735,30 +790,34 @@ static int intel_i915_remove_entries(struct agp_memory *mem,off_t pg_start, for (i = pg_start; i < (mem->page_count + pg_start); i++) { writel(agp_bridge->scratch_page, intel_i830_private.gtt+i); - readl(intel_i830_private.gtt+i); } + readl(intel_i830_private.gtt+i-1); - global_cache_flush(); agp_bridge->driver->tlb_flush(mem); return 0; } -static int intel_i915_fetch_size(void) +/* Return the aperture size by just checking the resource length. The effect + * described in the spec of the MSAC registers is just changing of the + * resource size. + */ +static int intel_i9xx_fetch_size(void) { - struct aper_size_info_fixed *values; - u32 temp, offset; + int num_sizes = sizeof(intel_i830_sizes) / sizeof(*intel_i830_sizes); + int aper_size; /* size in megabytes */ + int i; -#define I915_256MB_ADDRESS_MASK (1<<27) + aper_size = pci_resource_len(intel_i830_private.i830_dev, 2) / MB(1); - values = A_SIZE_FIX(agp_bridge->driver->aperture_sizes); + for (i = 0; i < num_sizes; i++) { + if (aper_size == intel_i830_sizes[i].size) { + agp_bridge->current_size = intel_i830_sizes + i; + agp_bridge->previous_size = agp_bridge->current_size; + return aper_size; + } + } - pci_read_config_dword(intel_i830_private.i830_dev, I915_GMADDR, &temp); - if (temp & I915_256MB_ADDRESS_MASK) - offset = 0; /* 128MB aperture */ - else - offset = 2; /* 256MB aperture */ - agp_bridge->previous_size = agp_bridge->current_size = (void *)(values + offset); - return values[offset].size; + return 0; } /* The intel i915 automatically initializes the agp aperture during POST. @@ -821,40 +880,9 @@ static unsigned long intel_i965_mask_memory(struct agp_bridge_data *bridge, return addr | bridge->driver->masks[type].mask; } -static int intel_i965_fetch_size(void) -{ - struct aper_size_info_fixed *values; - u32 offset = 0; - u8 temp; - -#define I965_512MB_ADDRESS_MASK (3<<1) - - values = A_SIZE_FIX(agp_bridge->driver->aperture_sizes); - - pci_read_config_byte(intel_i830_private.i830_dev, I965_MSAC, &temp); - temp &= I965_512MB_ADDRESS_MASK; - switch (temp) { - case 0x00: - offset = 0; /* 128MB */ - break; - case 0x06: - offset = 3; /* 512MB */ - break; - default: - case 0x02: - offset = 2; /* 256MB */ - break; - } - - agp_bridge->previous_size = agp_bridge->current_size = (void *)(values + offset); - - /* The i965 GTT is always sized as if it had a 512kB aperture size */ - return 512; -} - /* The intel i965 automatically initializes the agp aperture during POST. -+ * Use the memory already set aside for in the GTT. -+ */ + * Use the memory already set aside for in the GTT. + */ static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge) { int page_order; @@ -1574,7 +1602,7 @@ static struct agp_bridge_driver intel_915_driver = { .num_aperture_sizes = 4, .needs_scratch_page = TRUE, .configure = intel_i915_configure, - .fetch_size = intel_i915_fetch_size, + .fetch_size = intel_i9xx_fetch_size, .cleanup = intel_i915_cleanup, .tlb_flush = intel_i810_tlbflush, .mask_memory = intel_i810_mask_memory, @@ -1598,7 +1626,7 @@ static struct agp_bridge_driver intel_i965_driver = { .num_aperture_sizes = 4, .needs_scratch_page = TRUE, .configure = intel_i915_configure, - .fetch_size = intel_i965_fetch_size, + .fetch_size = intel_i9xx_fetch_size, .cleanup = intel_i915_cleanup, .tlb_flush = intel_i810_tlbflush, .mask_memory = intel_i965_mask_memory, diff --git a/drivers/char/agp/sgi-agp.c b/drivers/char/agp/sgi-agp.c index d73be4c2db8..902648db7ef 100644 --- a/drivers/char/agp/sgi-agp.c +++ b/drivers/char/agp/sgi-agp.c @@ -281,10 +281,11 @@ static int __devinit agp_sgi_init(void) else return 0; - sgi_tioca_agp_bridges = - (struct agp_bridge_data **)kmalloc(tioca_gart_found * - sizeof(struct agp_bridge_data *), - GFP_KERNEL); + sgi_tioca_agp_bridges = kmalloc(tioca_gart_found * + sizeof(struct agp_bridge_data *), + GFP_KERNEL); + if (!sgi_tioca_agp_bridges) + return -ENOMEM; j = 0; list_for_each_entry(info, &tioca_list, ca_list) { diff --git a/drivers/char/ip2/i2ellis.h b/drivers/char/ip2/i2ellis.h index 5eabe47b0bc..433305062fb 100644 --- a/drivers/char/ip2/i2ellis.h +++ b/drivers/char/ip2/i2ellis.h @@ -606,9 +606,9 @@ static int iiDownloadAll(i2eBordStrPtr, loadHdrStrPtr, int, int); // code and returning. // #define COMPLETE(pB,code) \ - if(1){ \ + do { \ pB->i2eError = code; \ return (code == I2EE_GOOD);\ - } + } while (0) #endif // I2ELLIS_H diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 3ece6923134..5c9f67f98d1 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -28,6 +28,7 @@ #include <linux/init.h> #include <linux/connector.h> #include <asm/atomic.h> +#include <asm/unaligned.h> #include <linux/cn_proc.h> @@ -60,7 +61,7 @@ void proc_fork_connector(struct task_struct *task) ev = (struct proc_event*)msg->data; get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - ev->timestamp_ns = timespec_to_ns(&ts); + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ev->what = PROC_EVENT_FORK; ev->event_data.fork.parent_pid = task->real_parent->pid; ev->event_data.fork.parent_tgid = task->real_parent->tgid; @@ -88,7 +89,7 @@ void proc_exec_connector(struct task_struct *task) ev = (struct proc_event*)msg->data; get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - ev->timestamp_ns = timespec_to_ns(&ts); + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ev->what = PROC_EVENT_EXEC; ev->event_data.exec.process_pid = task->pid; ev->event_data.exec.process_tgid = task->tgid; @@ -124,7 +125,7 @@ void proc_id_connector(struct task_struct *task, int which_id) return; get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - ev->timestamp_ns = timespec_to_ns(&ts); + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ @@ -146,7 +147,7 @@ void proc_exit_connector(struct task_struct *task) ev = (struct proc_event*)msg->data; get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - ev->timestamp_ns = timespec_to_ns(&ts); + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ev->what = PROC_EVENT_EXIT; ev->event_data.exit.process_pid = task->pid; ev->event_data.exit.process_tgid = task->tgid; @@ -181,7 +182,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) ev = (struct proc_event*)msg->data; msg->seq = rcvd_seq; ktime_get_ts(&ts); /* get high res monotonic timestamp */ - ev->timestamp_ns = timespec_to_ns(&ts); + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ev->cpu = -1; ev->what = PROC_EVENT_NONE; ev->event_data.ack.err = err; diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 6742b1adf2c..91ad342a605 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -285,6 +285,7 @@ cpufreq_stat_notifier_trans (struct notifier_block *nb, unsigned long val, stat = cpufreq_stats_table[freq->cpu]; if (!stat) return 0; + old_index = freq_table_get_index(stat, freq->old); new_index = freq_table_get_index(stat, freq->new); @@ -292,6 +293,9 @@ cpufreq_stat_notifier_trans (struct notifier_block *nb, unsigned long val, if (old_index == new_index) return 0; + if (old_index == -1 || new_index == -1) + return 0; + spin_lock(&cpufreq_stats_lock); stat->last_index = new_index; #ifdef CONFIG_CPU_FREQ_STAT_DETAILS diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 96d4a0bb220..ec796ad087d 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -6,13 +6,21 @@ menu "HID Devices" config HID tristate "Generic HID support" + depends on INPUT default y ---help--- - Say Y here if you want generic HID support to connect keyboards, - mice, joysticks, graphic tablets, or any other HID based devices - to your computer. You also need to select particular types of - HID devices you want to compile support for, in the particular - driver menu (USB, Bluetooth) + A human interface device (HID) is a type of computer device that + interacts directly with and takes input from humans. The term "HID" + most commonly used to refer to the USB-HID specification, but other + devices (such as, but not strictly limited to, Bluetooth) are + designed using HID specification (this involves certain keyboards, + mice, tablets, etc). This option compiles into kernel the generic + HID layer code (parser, usages, etc.), which can then be used by + transport-specific HID implementation (like USB or Bluetooth). + + For docs and specs, see http://www.usb.org/developers/hidpage/ + + If unsure, say Y endmenu diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index e1989f3a268..9367c4cfe93 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -564,13 +564,4 @@ config I2C_PNX This driver can also be built as a module. If so, the module will be called i2c-pnx. -config I2C_PNX_EARLY - bool "Early initialization for I2C on PNXxxxx" - depends on I2C_PNX=y - help - Under certain circumstances one may need to make sure I2C on PNXxxxx - is initialized earlier than some other driver that depends on it - (for instance, that might be USB in case of PNX4008). With this - option turned on you can guarantee that. - endmenu diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index bbc8e3a7ff5..490173611d6 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -529,6 +529,8 @@ mv64xxx_i2c_probe(struct platform_device *pd) platform_set_drvdata(pd, drv_data); i2c_set_adapdata(&drv_data->adapter, drv_data); + mv64xxx_i2c_hw_init(drv_data); + if (request_irq(drv_data->irq, mv64xxx_i2c_intr, 0, MV64XXX_I2C_CTLR_NAME, drv_data)) { dev_err(&drv_data->adapter.dev, @@ -542,8 +544,6 @@ mv64xxx_i2c_probe(struct platform_device *pd) goto exit_free_irq; } - mv64xxx_i2c_hw_init(drv_data); - return 0; exit_free_irq: diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index de0bca77e92..17376feb1ac 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -305,8 +305,7 @@ static int i2c_pnx_master_rcv(struct i2c_adapter *adap) return 0; } -static irqreturn_t -i2c_pnx_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id) { u32 stat, ctl; struct i2c_adapter *adap = dev_id; @@ -699,10 +698,6 @@ MODULE_AUTHOR("Vitaly Wool, Dennis Kovalev <source@mvista.com>"); MODULE_DESCRIPTION("I2C driver for Philips IP3204-based I2C busses"); MODULE_LICENSE("GPL"); -#ifdef CONFIG_I2C_PNX_EARLY /* We need to make sure I2C is initialized before USB */ subsys_initcall(i2c_adap_pnx_init); -#else -mudule_init(i2c_adap_pnx_init); -#endif module_exit(i2c_adap_pnx_exit); diff --git a/drivers/i2c/chips/m41t00.c b/drivers/i2c/chips/m41t00.c index 420377c8642..3fcb646e207 100644 --- a/drivers/i2c/chips/m41t00.c +++ b/drivers/i2c/chips/m41t00.c @@ -209,6 +209,7 @@ m41t00_set(void *arg) buf[m41t00_chip->hour] = (buf[m41t00_chip->hour] & ~0x3f) | (hour& 0x3f); buf[m41t00_chip->day] = (buf[m41t00_chip->day] & ~0x3f) | (day & 0x3f); buf[m41t00_chip->mon] = (buf[m41t00_chip->mon] & ~0x1f) | (mon & 0x1f); + buf[m41t00_chip->year] = year; if (i2c_master_send(save_client, wbuf, 9) < 0) dev_err(&save_client->dev, "m41t00_set: Write error\n"); diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 3e31f1d265c..b05378a3d67 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -95,16 +95,32 @@ struct device_driver i2c_adapter_driver = { .bus = &i2c_bus_type, }; +/* ------------------------------------------------------------------------- */ + +/* I2C bus adapters -- one roots each I2C or SMBUS segment */ + static void i2c_adapter_class_dev_release(struct class_device *dev) { struct i2c_adapter *adap = class_dev_to_i2c_adapter(dev); complete(&adap->class_dev_released); } +static ssize_t i2c_adapter_show_name(struct class_device *cdev, char *buf) +{ + struct i2c_adapter *adap = class_dev_to_i2c_adapter(cdev); + return sprintf(buf, "%s\n", adap->name); +} + +static struct class_device_attribute i2c_adapter_attrs[] = { + __ATTR(name, S_IRUGO, i2c_adapter_show_name, NULL), + { }, +}; + struct class i2c_adapter_class = { - .owner = THIS_MODULE, - .name = "i2c-adapter", - .release = &i2c_adapter_class_dev_release, + .owner = THIS_MODULE, + .name = "i2c-adapter", + .class_dev_attrs = i2c_adapter_attrs, + .release = &i2c_adapter_class_dev_release, }; static ssize_t show_adapter_name(struct device *dev, struct device_attribute *attr, char *buf) @@ -175,8 +191,12 @@ int i2c_add_adapter(struct i2c_adapter *adap) * If the parent pointer is not set up, * we add this adapter to the host bus. */ - if (adap->dev.parent == NULL) + if (adap->dev.parent == NULL) { adap->dev.parent = &platform_bus; + printk(KERN_WARNING "**WARNING** I2C adapter driver [%s] " + "forgot to specify physical device; fix it!\n", + adap->name); + } sprintf(adap->dev.bus_id, "i2c-%d", adap->nr); adap->dev.driver = &i2c_adapter_driver; adap->dev.release = &i2c_adapter_dev_release; diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c index ffdffb6379e..524e65de439 100644 --- a/drivers/ide/pci/atiixp.c +++ b/drivers/ide/pci/atiixp.c @@ -46,6 +46,8 @@ static atiixp_ide_timing mdma_timing[] = { static int save_mdma_mode[4]; +static DEFINE_SPINLOCK(atiixp_lock); + /** * atiixp_ratemask - compute rate mask for ATIIXP IDE * @drive: IDE drive to compute for @@ -105,7 +107,7 @@ static int atiixp_ide_dma_host_on(ide_drive_t *drive) unsigned long flags; u16 tmp16; - spin_lock_irqsave(&ide_lock, flags); + spin_lock_irqsave(&atiixp_lock, flags); pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &tmp16); if (save_mdma_mode[drive->dn]) @@ -114,7 +116,7 @@ static int atiixp_ide_dma_host_on(ide_drive_t *drive) tmp16 |= (1 << drive->dn); pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, tmp16); - spin_unlock_irqrestore(&ide_lock, flags); + spin_unlock_irqrestore(&atiixp_lock, flags); return __ide_dma_host_on(drive); } @@ -125,13 +127,13 @@ static int atiixp_ide_dma_host_off(ide_drive_t *drive) unsigned long flags; u16 tmp16; - spin_lock_irqsave(&ide_lock, flags); + spin_lock_irqsave(&atiixp_lock, flags); pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &tmp16); tmp16 &= ~(1 << drive->dn); pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, tmp16); - spin_unlock_irqrestore(&ide_lock, flags); + spin_unlock_irqrestore(&atiixp_lock, flags); return __ide_dma_host_off(drive); } @@ -152,7 +154,7 @@ static void atiixp_tuneproc(ide_drive_t *drive, u8 pio) u32 pio_timing_data; u16 pio_mode_data; - spin_lock_irqsave(&ide_lock, flags); + spin_lock_irqsave(&atiixp_lock, flags); pci_read_config_word(dev, ATIIXP_IDE_PIO_MODE, &pio_mode_data); pio_mode_data &= ~(0x07 << (drive->dn * 4)); @@ -165,7 +167,7 @@ static void atiixp_tuneproc(ide_drive_t *drive, u8 pio) (pio_timing[pio].command_width << (timing_shift + 4)); pci_write_config_dword(dev, ATIIXP_IDE_PIO_TIMING, pio_timing_data); - spin_unlock_irqrestore(&ide_lock, flags); + spin_unlock_irqrestore(&atiixp_lock, flags); } /** @@ -189,7 +191,7 @@ static int atiixp_speedproc(ide_drive_t *drive, u8 xferspeed) speed = ide_rate_filter(atiixp_ratemask(drive), xferspeed); - spin_lock_irqsave(&ide_lock, flags); + spin_lock_irqsave(&atiixp_lock, flags); save_mdma_mode[drive->dn] = 0; if (speed >= XFER_UDMA_0) { @@ -208,7 +210,7 @@ static int atiixp_speedproc(ide_drive_t *drive, u8 xferspeed) } } - spin_unlock_irqrestore(&ide_lock, flags); + spin_unlock_irqrestore(&atiixp_lock, flags); if (speed >= XFER_SW_DMA_0) pio = atiixp_dma_2_pio(speed); diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c index 61f1a9665a7..381cc6f101c 100644 --- a/drivers/ide/pci/via82cxxx.c +++ b/drivers/ide/pci/via82cxxx.c @@ -123,7 +123,7 @@ struct via82cxxx_dev static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing) { struct pci_dev *dev = hwif->pci_dev; - struct via82cxxx_dev *vdev = ide_get_hwifdata(hwif); + struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev); u8 t; if (~vdev->via_config->flags & VIA_BAD_AST) { @@ -162,7 +162,7 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing) static int via_set_drive(ide_drive_t *drive, u8 speed) { ide_drive_t *peer = HWIF(drive)->drives + (~drive->dn & 1); - struct via82cxxx_dev *vdev = ide_get_hwifdata(drive->hwif); + struct via82cxxx_dev *vdev = pci_get_drvdata(drive->hwif->pci_dev); struct ide_timing t, p; unsigned int T, UT; @@ -225,7 +225,7 @@ static void via82cxxx_tune_drive(ide_drive_t *drive, u8 pio) static int via82cxxx_ide_dma_check (ide_drive_t *drive) { ide_hwif_t *hwif = HWIF(drive); - struct via82cxxx_dev *vdev = ide_get_hwifdata(hwif); + struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev); u16 w80 = hwif->udma_four; u16 speed = ide_find_best_mode(drive, @@ -262,6 +262,53 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa) return via_config; } +/* + * Check and handle 80-wire cable presence + */ +static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u) +{ + int i; + + switch (vdev->via_config->flags & VIA_UDMA) { + case VIA_UDMA_66: + for (i = 24; i >= 0; i -= 8) + if (((u >> (i & 16)) & 8) && + ((u >> i) & 0x20) && + (((u >> i) & 7) < 2)) { + /* + * 2x PCI clock and + * UDMA w/ < 3T/cycle + */ + vdev->via_80w |= (1 << (1 - (i >> 4))); + } + break; + + case VIA_UDMA_100: + for (i = 24; i >= 0; i -= 8) + if (((u >> i) & 0x10) || + (((u >> i) & 0x20) && + (((u >> i) & 7) < 4))) { + /* BIOS 80-wire bit or + * UDMA w/ < 60ns/cycle + */ + vdev->via_80w |= (1 << (1 - (i >> 4))); + } + break; + + case VIA_UDMA_133: + for (i = 24; i >= 0; i -= 8) + if (((u >> i) & 0x10) || + (((u >> i) & 0x20) && + (((u >> i) & 7) < 6))) { + /* BIOS 80-wire bit or + * UDMA w/ < 60ns/cycle + */ + vdev->via_80w |= (1 << (1 - (i >> 4))); + } + break; + } +} + /** * init_chipset_via82cxxx - initialization handler * @dev: PCI device @@ -274,14 +321,22 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa) static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const char *name) { struct pci_dev *isa = NULL; + struct via82cxxx_dev *vdev; struct via_isa_bridge *via_config; u8 t, v; - unsigned int u; + u32 u; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) { + printk(KERN_ERR "VP_IDE: out of memory :(\n"); + return -ENOMEM; + } + pci_set_drvdata(dev, vdev); /* * Find the ISA bridge to see how good the IDE is. */ - via_config = via_config_find(&isa); + vdev->via_config = via_config = via_config_find(&isa); /* We checked this earlier so if it fails here deeep badness is involved */ @@ -289,16 +344,17 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const BUG_ON(!via_config->id); /* - * Setup or disable Clk66 if appropriate + * Detect cable and configure Clk66 */ + pci_read_config_dword(dev, VIA_UDMA_TIMING, &u); + + via_cable_detect(vdev, u); if ((via_config->flags & VIA_UDMA) == VIA_UDMA_66) { /* Enable Clk66 */ - pci_read_config_dword(dev, VIA_UDMA_TIMING, &u); pci_write_config_dword(dev, VIA_UDMA_TIMING, u|0x80008); } else if (via_config->flags & VIA_BAD_CLK66) { /* Would cause trouble on 596a and 686 */ - pci_read_config_dword(dev, VIA_UDMA_TIMING, &u); pci_write_config_dword(dev, VIA_UDMA_TIMING, u & ~0x80008); } @@ -367,75 +423,11 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const return 0; } -/* - * Check and handle 80-wire cable presence - */ -static void __devinit via_cable_detect(struct pci_dev *dev, struct via82cxxx_dev *vdev) -{ - unsigned int u; - int i; - pci_read_config_dword(dev, VIA_UDMA_TIMING, &u); - - switch (vdev->via_config->flags & VIA_UDMA) { - - case VIA_UDMA_66: - for (i = 24; i >= 0; i -= 8) - if (((u >> (i & 16)) & 8) && - ((u >> i) & 0x20) && - (((u >> i) & 7) < 2)) { - /* - * 2x PCI clock and - * UDMA w/ < 3T/cycle - */ - vdev->via_80w |= (1 << (1 - (i >> 4))); - } - break; - - case VIA_UDMA_100: - for (i = 24; i >= 0; i -= 8) - if (((u >> i) & 0x10) || - (((u >> i) & 0x20) && - (((u >> i) & 7) < 4))) { - /* BIOS 80-wire bit or - * UDMA w/ < 60ns/cycle - */ - vdev->via_80w |= (1 << (1 - (i >> 4))); - } - break; - - case VIA_UDMA_133: - for (i = 24; i >= 0; i -= 8) - if (((u >> i) & 0x10) || - (((u >> i) & 0x20) && - (((u >> i) & 7) < 6))) { - /* BIOS 80-wire bit or - * UDMA w/ < 60ns/cycle - */ - vdev->via_80w |= (1 << (1 - (i >> 4))); - } - break; - - } -} - static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif) { - struct via82cxxx_dev *vdev = kmalloc(sizeof(struct via82cxxx_dev), - GFP_KERNEL); - struct pci_dev *isa = NULL; + struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev); int i; - if (vdev == NULL) { - printk(KERN_ERR "VP_IDE: out of memory :(\n"); - return; - } - - memset(vdev, 0, sizeof(struct via82cxxx_dev)); - ide_set_hwifdata(hwif, vdev); - - vdev->via_config = via_config_find(&isa); - via_cable_detect(hwif->pci_dev, vdev); - hwif->autodma = 0; hwif->tuneproc = &via82cxxx_tune_drive; diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 100df6f38d9..91e0c75aca8 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -52,6 +52,8 @@ #define KVM_MAX_VCPUS 1 #define KVM_MEMORY_SLOTS 4 #define KVM_NUM_MMU_PAGES 256 +#define KVM_MIN_FREE_MMU_PAGES 5 +#define KVM_REFILL_PAGES 25 #define FX_IMAGE_SIZE 512 #define FX_IMAGE_ALIGN 16 @@ -89,14 +91,54 @@ typedef unsigned long hva_t; typedef u64 hpa_t; typedef unsigned long hfn_t; +#define NR_PTE_CHAIN_ENTRIES 5 + +struct kvm_pte_chain { + u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES]; + struct hlist_node link; +}; + +/* + * kvm_mmu_page_role, below, is defined as: + * + * bits 0:3 - total guest paging levels (2-4, or zero for real mode) + * bits 4:7 - page table level for this shadow (1-4) + * bits 8:9 - page table quadrant for 2-level guests + * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) + */ +union kvm_mmu_page_role { + unsigned word; + struct { + unsigned glevels : 4; + unsigned level : 4; + unsigned quadrant : 2; + unsigned pad_for_nice_hex_output : 6; + unsigned metaphysical : 1; + }; +}; + struct kvm_mmu_page { struct list_head link; + struct hlist_node hash_link; + + /* + * The following two entries are used to key the shadow page in the + * hash table. + */ + gfn_t gfn; + union kvm_mmu_page_role role; + hpa_t page_hpa; unsigned long slot_bitmap; /* One bit set per slot which has memory * in this shadow page. */ int global; /* Set if all ptes in this page are global */ - u64 *parent_pte; + int multimapped; /* More than one parent_pte? */ + int root_count; /* Currently serving as active root */ + union { + u64 *parent_pte; /* !multimapped */ + struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ + }; }; struct vmcs { @@ -117,14 +159,26 @@ struct kvm_vcpu; struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); - void (*inval_page)(struct kvm_vcpu *vcpu, gva_t gva); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); hpa_t root_hpa; int root_level; int shadow_root_level; + + u64 *pae_root; +}; + +#define KVM_NR_MEM_OBJS 20 + +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; }; +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ struct kvm_guest_debug { int enabled; unsigned long bp[4]; @@ -173,6 +227,7 @@ struct kvm_vcpu { struct mutex mutex; int cpu; int launched; + int interrupt_window_open; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) unsigned long irq_pending[NR_IRQ_WORDS]; @@ -184,6 +239,7 @@ struct kvm_vcpu { unsigned long cr3; unsigned long cr4; unsigned long cr8; + u64 pdptrs[4]; /* pae */ u64 shadow_efer; u64 apic_base; int nmsrs; @@ -194,6 +250,12 @@ struct kvm_vcpu { struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES]; struct kvm_mmu mmu; + struct kvm_mmu_memory_cache mmu_pte_chain_cache; + struct kvm_mmu_memory_cache mmu_rmap_desc_cache; + + gfn_t last_pt_write_gfn; + int last_pt_write_count; + struct kvm_guest_debug guest_debug; char fx_buf[FX_BUF_SIZE]; @@ -231,10 +293,16 @@ struct kvm { spinlock_t lock; /* protects everything except vcpus */ int nmemslots; struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS]; + /* + * Hash table of struct kvm_mmu_page. + */ struct list_head active_mmu_pages; + int n_free_mmu_pages; + struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; int memory_config_version; int busy; + unsigned long rmap_overflow; }; struct kvm_stat { @@ -247,6 +315,9 @@ struct kvm_stat { u32 io_exits; u32 mmio_exits; u32 signal_exits; + u32 irq_window_exits; + u32 halt_exits; + u32 request_irq_exits; u32 irq_exits; }; @@ -279,6 +350,7 @@ struct kvm_arch_ops { void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); + void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu, unsigned long cr0); @@ -323,7 +395,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); -void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); +void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot); hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) @@ -396,6 +468,19 @@ int kvm_write_guest(struct kvm_vcpu *vcpu, unsigned long segment_base(u16 selector); +void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes); +void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes); +int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); +void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); + +static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, + u32 error_code) +{ + if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) + kvm_mmu_free_some_pages(vcpu); + return vcpu->mmu.page_fault(vcpu, gva, error_code); +} + static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); @@ -541,19 +626,4 @@ static inline u32 get_rdx_init_val(void) #define TSS_REDIRECTION_SIZE (256 / 8) #define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) -#ifdef CONFIG_X86_64 - -/* - * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. Therefore - * we need to allocate shadow page tables in the first 4GB of memory, which - * happens to fit the DMA32 zone. - */ -#define GFP_KVM_MMU (GFP_KERNEL | __GFP_DMA32) - -#else - -#define GFP_KVM_MMU GFP_KERNEL - -#endif - #endif diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ce7fe640f18..67c1154960f 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -58,6 +58,9 @@ static struct kvm_stats_debugfs_item { { "io_exits", &kvm_stat.io_exits }, { "mmio_exits", &kvm_stat.mmio_exits }, { "signal_exits", &kvm_stat.signal_exits }, + { "irq_window", &kvm_stat.irq_window_exits }, + { "halt_exits", &kvm_stat.halt_exits }, + { "request_irq", &kvm_stat.request_irq_exits }, { "irq_exits", &kvm_stat.irq_exits }, { 0, 0 } }; @@ -227,6 +230,7 @@ static int kvm_dev_open(struct inode *inode, struct file *filp) struct kvm_vcpu *vcpu = &kvm->vcpus[i]; mutex_init(&vcpu->mutex); + vcpu->kvm = kvm; vcpu->mmu.root_hpa = INVALID_PAGE; INIT_LIST_HEAD(&vcpu->free_pages); } @@ -268,8 +272,8 @@ static void kvm_free_physmem(struct kvm *kvm) static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { - kvm_arch_ops->vcpu_free(vcpu); kvm_mmu_destroy(vcpu); + kvm_arch_ops->vcpu_free(vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -295,14 +299,17 @@ static void inject_gp(struct kvm_vcpu *vcpu) kvm_arch_ops->inject_gp(vcpu, 0); } -static int pdptrs_have_reserved_bits_set(struct kvm_vcpu *vcpu, - unsigned long cr3) +/* + * Load the pae pdptrs. Return true is they are all valid. + */ +static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) { gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; - unsigned offset = (cr3 & (PAGE_SIZE-1)) >> 5; + unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; int i; u64 pdpte; u64 *pdpt; + int ret; struct kvm_memory_slot *memslot; spin_lock(&vcpu->kvm->lock); @@ -310,16 +317,23 @@ static int pdptrs_have_reserved_bits_set(struct kvm_vcpu *vcpu, /* FIXME: !memslot - emulate? 0xff? */ pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0); + ret = 1; for (i = 0; i < 4; ++i) { pdpte = pdpt[offset + i]; - if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) - break; + if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) { + ret = 0; + goto out; + } } + for (i = 0; i < 4; ++i) + vcpu->pdptrs[i] = pdpt[offset + i]; + +out: kunmap_atomic(pdpt, KM_USER0); spin_unlock(&vcpu->kvm->lock); - return i != 4; + return ret; } void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) @@ -365,8 +379,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } } else #endif - if (is_pae(vcpu) && - pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) { + if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) { printk(KERN_DEBUG "set_cr0: #GP, pdptrs " "reserved bits\n"); inject_gp(vcpu); @@ -387,6 +400,7 @@ EXPORT_SYMBOL_GPL(set_cr0); void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) { + kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f)); } EXPORT_SYMBOL_GPL(lmsw); @@ -407,7 +421,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return; } } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK) - && pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) { + && !load_pdptrs(vcpu, vcpu->cr3)) { printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); inject_gp(vcpu); } @@ -439,7 +453,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) return; } if (is_paging(vcpu) && is_pae(vcpu) && - pdptrs_have_reserved_bits_set(vcpu, cr3)) { + !load_pdptrs(vcpu, cr3)) { printk(KERN_DEBUG "set_cr3: #GP, pdptrs " "reserved bits\n"); inject_gp(vcpu); @@ -449,7 +463,19 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) vcpu->cr3 = cr3; spin_lock(&vcpu->kvm->lock); - vcpu->mmu.new_cr3(vcpu); + /* + * Does the new cr3 value map to physical memory? (Note, we + * catch an invalid cr3 even in real-mode, because it would + * cause trouble later on when we turn on paging anyway.) + * + * A real CPU would silently accept an invalid cr3 and would + * attempt to use it - with largely undefined (and often hard + * to debug) behavior on the guest side. + */ + if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) + inject_gp(vcpu); + else + vcpu->mmu.new_cr3(vcpu); spin_unlock(&vcpu->kvm->lock); } EXPORT_SYMBOL_GPL(set_cr3); @@ -517,7 +543,6 @@ static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n) vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; vcpu->cpu = -1; /* First load will set up TR */ - vcpu->kvm = kvm; r = kvm_arch_ops->vcpu_create(vcpu); if (r < 0) goto out_free_vcpus; @@ -634,6 +659,7 @@ raced: | __GFP_ZERO); if (!new.phys_mem[i]) goto out_free; + new.phys_mem[i]->private = 0; } } @@ -688,6 +714,13 @@ out: return r; } +static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot) +{ + spin_lock(&vcpu->kvm->lock); + kvm_mmu_slot_remove_write_access(vcpu, slot); + spin_unlock(&vcpu->kvm->lock); +} + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -697,6 +730,7 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int r, i; int n; + int cleared; unsigned long any = 0; spin_lock(&kvm->lock); @@ -727,15 +761,17 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm, if (any) { - spin_lock(&kvm->lock); - kvm_mmu_slot_remove_write_access(kvm, log->slot); - spin_unlock(&kvm->lock); - memset(memslot->dirty_bitmap, 0, n); + cleared = 0; for (i = 0; i < KVM_MAX_VCPUS; ++i) { struct kvm_vcpu *vcpu = vcpu_load(kvm, i); if (!vcpu) continue; + if (!cleared) { + do_remove_write_access(vcpu, log->slot); + memset(memslot->dirty_bitmap, 0, n); + cleared = 1; + } kvm_arch_ops->tlb_flush(vcpu); vcpu_put(vcpu); } @@ -863,6 +899,27 @@ static int emulator_read_emulated(unsigned long addr, } } +static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, + unsigned long val, int bytes) +{ + struct kvm_memory_slot *m; + struct page *page; + void *virt; + + if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) + return 0; + m = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); + if (!m) + return 0; + page = gfn_to_page(m, gpa >> PAGE_SHIFT); + kvm_mmu_pre_write(vcpu, gpa, bytes); + virt = kmap_atomic(page, KM_USER0); + memcpy(virt + offset_in_page(gpa), &val, bytes); + kunmap_atomic(virt, KM_USER0); + kvm_mmu_post_write(vcpu, gpa, bytes); + return 1; +} + static int emulator_write_emulated(unsigned long addr, unsigned long val, unsigned int bytes, @@ -874,6 +931,9 @@ static int emulator_write_emulated(unsigned long addr, if (gpa == UNMAPPED_GVA) return X86EMUL_PROPAGATE_FAULT; + if (emulator_write_phys(vcpu, gpa, val, bytes)) + return X86EMUL_CONTINUE; + vcpu->mmio_needed = 1; vcpu->mmio_phys_addr = gpa; vcpu->mmio_size = bytes; @@ -898,6 +958,30 @@ static int emulator_cmpxchg_emulated(unsigned long addr, return emulator_write_emulated(addr, new, bytes, ctxt); } +#ifdef CONFIG_X86_32 + +static int emulator_cmpxchg8b_emulated(unsigned long addr, + unsigned long old_lo, + unsigned long old_hi, + unsigned long new_lo, + unsigned long new_hi, + struct x86_emulate_ctxt *ctxt) +{ + static int reported; + int r; + + if (!reported) { + reported = 1; + printk(KERN_WARNING "kvm: emulating exchange8b as write\n"); + } + r = emulator_write_emulated(addr, new_lo, 4, ctxt); + if (r != X86EMUL_CONTINUE) + return r; + return emulator_write_emulated(addr+4, new_hi, 4, ctxt); +} + +#endif + static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) { return kvm_arch_ops->get_segment_base(vcpu, seg); @@ -905,18 +989,15 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) { - spin_lock(&vcpu->kvm->lock); - vcpu->mmu.inval_page(vcpu, address); - spin_unlock(&vcpu->kvm->lock); - kvm_arch_ops->invlpg(vcpu, address); return X86EMUL_CONTINUE; } int emulate_clts(struct kvm_vcpu *vcpu) { - unsigned long cr0 = vcpu->cr0; + unsigned long cr0; - cr0 &= ~CR0_TS_MASK; + kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); + cr0 = vcpu->cr0 & ~CR0_TS_MASK; kvm_arch_ops->set_cr0(vcpu, cr0); return X86EMUL_CONTINUE; } @@ -975,6 +1056,9 @@ struct x86_emulate_ops emulate_ops = { .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, +#ifdef CONFIG_X86_32 + .cmpxchg8b_emulated = emulator_cmpxchg8b_emulated, +#endif }; int emulate_instruction(struct kvm_vcpu *vcpu, @@ -1024,6 +1108,8 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } if (r) { + if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) + return EMULATE_DONE; if (!vcpu->mmio_needed) { report_emulation_failure(&emulate_ctxt); return EMULATE_FAIL; @@ -1069,6 +1155,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) { + kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); switch (cr) { case 0: return vcpu->cr0; @@ -1403,6 +1490,7 @@ static int kvm_dev_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs) sregs->gdt.limit = dt.limit; sregs->gdt.base = dt.base; + kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); sregs->cr0 = vcpu->cr0; sregs->cr2 = vcpu->cr2; sregs->cr3 = vcpu->cr3; @@ -1467,11 +1555,15 @@ static int kvm_dev_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs) #endif vcpu->apic_base = sregs->apic_base; + kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); + mmu_reset_needed |= vcpu->cr0 != sregs->cr0; kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0); mmu_reset_needed |= vcpu->cr4 != sregs->cr4; kvm_arch_ops->set_cr4(vcpu, sregs->cr4); + if (!is_long_mode(vcpu) && is_pae(vcpu)) + load_pdptrs(vcpu, vcpu->cr3); if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); @@ -1693,12 +1785,12 @@ static long kvm_dev_ioctl(struct file *filp, if (copy_from_user(&kvm_run, (void *)arg, sizeof kvm_run)) goto out; r = kvm_dev_ioctl_run(kvm, &kvm_run); - if (r < 0) + if (r < 0 && r != -EINTR) goto out; - r = -EFAULT; - if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run)) + if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run)) { + r = -EFAULT; goto out; - r = 0; + } break; } case KVM_GET_REGS: { @@ -1842,6 +1934,7 @@ static long kvm_dev_ioctl(struct file *filp, num_msrs_to_save * sizeof(u32))) goto out; r = 0; + break; } default: ; @@ -1944,17 +2037,17 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) return -EEXIST; } - kvm_arch_ops = ops; - - if (!kvm_arch_ops->cpu_has_kvm_support()) { + if (!ops->cpu_has_kvm_support()) { printk(KERN_ERR "kvm: no hardware support\n"); return -EOPNOTSUPP; } - if (kvm_arch_ops->disabled_by_bios()) { + if (ops->disabled_by_bios()) { printk(KERN_ERR "kvm: disabled by bios\n"); return -EOPNOTSUPP; } + kvm_arch_ops = ops; + r = kvm_arch_ops->hardware_setup(); if (r < 0) return r; diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 790423c5f23..c6f972914f0 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -26,7 +26,31 @@ #include "vmx.h" #include "kvm.h" +#undef MMU_DEBUG + +#undef AUDIT + +#ifdef AUDIT +static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg); +#else +static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} +#endif + +#ifdef MMU_DEBUG + +#define pgprintk(x...) do { if (dbg) printk(x); } while (0) +#define rmap_printk(x...) do { if (dbg) printk(x); } while (0) + +#else + #define pgprintk(x...) do { } while (0) +#define rmap_printk(x...) do { } while (0) + +#endif + +#if defined(MMU_DEBUG) || defined(AUDIT) +static int dbg = 1; +#endif #define ASSERT(x) \ if (!(x)) { \ @@ -34,8 +58,10 @@ __FILE__, __LINE__, #x); \ } -#define PT64_ENT_PER_PAGE 512 -#define PT32_ENT_PER_PAGE 1024 +#define PT64_PT_BITS 9 +#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) +#define PT32_PT_BITS 10 +#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) #define PT_WRITABLE_SHIFT 1 @@ -125,6 +151,13 @@ #define PT_DIRECTORY_LEVEL 2 #define PT_PAGE_TABLE_LEVEL 1 +#define RMAP_EXT 4 + +struct kvm_rmap_desc { + u64 *shadow_ptes[RMAP_EXT]; + struct kvm_rmap_desc *more; +}; + static int is_write_protection(struct kvm_vcpu *vcpu) { return vcpu->cr0 & CR0_WP_MASK; @@ -150,32 +183,272 @@ static int is_io_pte(unsigned long pte) return pte & PT_SHADOW_IO_MARK; } +static int is_rmap_pte(u64 pte) +{ + return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK)) + == (PT_WRITABLE_MASK | PT_PRESENT_MASK); +} + +static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, + size_t objsize, int min) +{ + void *obj; + + if (cache->nobjs >= min) + return 0; + while (cache->nobjs < ARRAY_SIZE(cache->objects)) { + obj = kzalloc(objsize, GFP_NOWAIT); + if (!obj) + return -ENOMEM; + cache->objects[cache->nobjs++] = obj; + } + return 0; +} + +static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) +{ + while (mc->nobjs) + kfree(mc->objects[--mc->nobjs]); +} + +static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) +{ + int r; + + r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, + sizeof(struct kvm_pte_chain), 4); + if (r) + goto out; + r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, + sizeof(struct kvm_rmap_desc), 1); +out: + return r; +} + +static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) +{ + mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); + mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); +} + +static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, + size_t size) +{ + void *p; + + BUG_ON(!mc->nobjs); + p = mc->objects[--mc->nobjs]; + memset(p, 0, size); + return p; +} + +static void mmu_memory_cache_free(struct kvm_mmu_memory_cache *mc, void *obj) +{ + if (mc->nobjs < KVM_NR_MEM_OBJS) + mc->objects[mc->nobjs++] = obj; + else + kfree(obj); +} + +static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) +{ + return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache, + sizeof(struct kvm_pte_chain)); +} + +static void mmu_free_pte_chain(struct kvm_vcpu *vcpu, + struct kvm_pte_chain *pc) +{ + mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc); +} + +static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) +{ + return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache, + sizeof(struct kvm_rmap_desc)); +} + +static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu, + struct kvm_rmap_desc *rd) +{ + mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd); +} + +/* + * Reverse mapping data structures: + * + * If page->private bit zero is zero, then page->private points to the + * shadow page table entry that points to page_address(page). + * + * If page->private bit zero is one, (then page->private & ~1) points + * to a struct kvm_rmap_desc containing more mappings. + */ +static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte) +{ + struct page *page; + struct kvm_rmap_desc *desc; + int i; + + if (!is_rmap_pte(*spte)) + return; + page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + if (!page->private) { + rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); + page->private = (unsigned long)spte; + } else if (!(page->private & 1)) { + rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); + desc = mmu_alloc_rmap_desc(vcpu); + desc->shadow_ptes[0] = (u64 *)page->private; + desc->shadow_ptes[1] = spte; + page->private = (unsigned long)desc | 1; + } else { + rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); + desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) + desc = desc->more; + if (desc->shadow_ptes[RMAP_EXT-1]) { + desc->more = mmu_alloc_rmap_desc(vcpu); + desc = desc->more; + } + for (i = 0; desc->shadow_ptes[i]; ++i) + ; + desc->shadow_ptes[i] = spte; + } +} + +static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu, + struct page *page, + struct kvm_rmap_desc *desc, + int i, + struct kvm_rmap_desc *prev_desc) +{ + int j; + + for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j) + ; + desc->shadow_ptes[i] = desc->shadow_ptes[j]; + desc->shadow_ptes[j] = 0; + if (j != 0) + return; + if (!prev_desc && !desc->more) + page->private = (unsigned long)desc->shadow_ptes[0]; + else + if (prev_desc) + prev_desc->more = desc->more; + else + page->private = (unsigned long)desc->more | 1; + mmu_free_rmap_desc(vcpu, desc); +} + +static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte) +{ + struct page *page; + struct kvm_rmap_desc *desc; + struct kvm_rmap_desc *prev_desc; + int i; + + if (!is_rmap_pte(*spte)) + return; + page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + if (!page->private) { + printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); + BUG(); + } else if (!(page->private & 1)) { + rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); + if ((u64 *)page->private != spte) { + printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", + spte, *spte); + BUG(); + } + page->private = 0; + } else { + rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); + desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + prev_desc = NULL; + while (desc) { + for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) + if (desc->shadow_ptes[i] == spte) { + rmap_desc_remove_entry(vcpu, page, + desc, i, + prev_desc); + return; + } + prev_desc = desc; + desc = desc->more; + } + BUG(); + } +} + +static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) +{ + struct kvm *kvm = vcpu->kvm; + struct page *page; + struct kvm_memory_slot *slot; + struct kvm_rmap_desc *desc; + u64 *spte; + + slot = gfn_to_memslot(kvm, gfn); + BUG_ON(!slot); + page = gfn_to_page(slot, gfn); + + while (page->private) { + if (!(page->private & 1)) + spte = (u64 *)page->private; + else { + desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + spte = desc->shadow_ptes[0]; + } + BUG_ON(!spte); + BUG_ON((*spte & PT64_BASE_ADDR_MASK) != + page_to_pfn(page) << PAGE_SHIFT); + BUG_ON(!(*spte & PT_PRESENT_MASK)); + BUG_ON(!(*spte & PT_WRITABLE_MASK)); + rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); + rmap_remove(vcpu, spte); + kvm_arch_ops->tlb_flush(vcpu); + *spte &= ~(u64)PT_WRITABLE_MASK; + } +} + +static int is_empty_shadow_page(hpa_t page_hpa) +{ + u64 *pos; + u64 *end; + + for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64); + pos != end; pos++) + if (*pos != 0) { + printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, + pos, *pos); + return 0; + } + return 1; +} + static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) { struct kvm_mmu_page *page_head = page_header(page_hpa); + ASSERT(is_empty_shadow_page(page_hpa)); list_del(&page_head->link); page_head->page_hpa = page_hpa; list_add(&page_head->link, &vcpu->free_pages); + ++vcpu->kvm->n_free_mmu_pages; } -static int is_empty_shadow_page(hpa_t page_hpa) +static unsigned kvm_page_table_hashfn(gfn_t gfn) { - u32 *pos; - u32 *end; - for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u32); - pos != end; pos++) - if (*pos != 0) - return 0; - return 1; + return gfn; } -static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte) +static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, + u64 *parent_pte) { struct kvm_mmu_page *page; if (list_empty(&vcpu->free_pages)) - return INVALID_PAGE; + return NULL; page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); list_del(&page->link); @@ -183,8 +456,239 @@ static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte) ASSERT(is_empty_shadow_page(page->page_hpa)); page->slot_bitmap = 0; page->global = 1; + page->multimapped = 0; page->parent_pte = parent_pte; - return page->page_hpa; + --vcpu->kvm->n_free_mmu_pages; + return page; +} + +static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page, u64 *parent_pte) +{ + struct kvm_pte_chain *pte_chain; + struct hlist_node *node; + int i; + + if (!parent_pte) + return; + if (!page->multimapped) { + u64 *old = page->parent_pte; + + if (!old) { + page->parent_pte = parent_pte; + return; + } + page->multimapped = 1; + pte_chain = mmu_alloc_pte_chain(vcpu); + INIT_HLIST_HEAD(&page->parent_ptes); + hlist_add_head(&pte_chain->link, &page->parent_ptes); + pte_chain->parent_ptes[0] = old; + } + hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) { + if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1]) + continue; + for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) + if (!pte_chain->parent_ptes[i]) { + pte_chain->parent_ptes[i] = parent_pte; + return; + } + } + pte_chain = mmu_alloc_pte_chain(vcpu); + BUG_ON(!pte_chain); + hlist_add_head(&pte_chain->link, &page->parent_ptes); + pte_chain->parent_ptes[0] = parent_pte; +} + +static void mmu_page_remove_parent_pte(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page, + u64 *parent_pte) +{ + struct kvm_pte_chain *pte_chain; + struct hlist_node *node; + int i; + + if (!page->multimapped) { + BUG_ON(page->parent_pte != parent_pte); + page->parent_pte = NULL; + return; + } + hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) + for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { + if (!pte_chain->parent_ptes[i]) + break; + if (pte_chain->parent_ptes[i] != parent_pte) + continue; + while (i + 1 < NR_PTE_CHAIN_ENTRIES + && pte_chain->parent_ptes[i + 1]) { + pte_chain->parent_ptes[i] + = pte_chain->parent_ptes[i + 1]; + ++i; + } + pte_chain->parent_ptes[i] = NULL; + if (i == 0) { + hlist_del(&pte_chain->link); + mmu_free_pte_chain(vcpu, pte_chain); + if (hlist_empty(&page->parent_ptes)) { + page->multimapped = 0; + page->parent_pte = NULL; + } + } + return; + } + BUG(); +} + +static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu, + gfn_t gfn) +{ + unsigned index; + struct hlist_head *bucket; + struct kvm_mmu_page *page; + struct hlist_node *node; + + pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); + index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; + bucket = &vcpu->kvm->mmu_page_hash[index]; + hlist_for_each_entry(page, node, bucket, hash_link) + if (page->gfn == gfn && !page->role.metaphysical) { + pgprintk("%s: found role %x\n", + __FUNCTION__, page->role.word); + return page; + } + return NULL; +} + +static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, + gfn_t gfn, + gva_t gaddr, + unsigned level, + int metaphysical, + u64 *parent_pte) +{ + union kvm_mmu_page_role role; + unsigned index; + unsigned quadrant; + struct hlist_head *bucket; + struct kvm_mmu_page *page; + struct hlist_node *node; + + role.word = 0; + role.glevels = vcpu->mmu.root_level; + role.level = level; + role.metaphysical = metaphysical; + if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { + quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); + quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; + role.quadrant = quadrant; + } + pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__, + gfn, role.word); + index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; + bucket = &vcpu->kvm->mmu_page_hash[index]; + hlist_for_each_entry(page, node, bucket, hash_link) + if (page->gfn == gfn && page->role.word == role.word) { + mmu_page_add_parent_pte(vcpu, page, parent_pte); + pgprintk("%s: found\n", __FUNCTION__); + return page; + } + page = kvm_mmu_alloc_page(vcpu, parent_pte); + if (!page) + return page; + pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word); + page->gfn = gfn; + page->role = role; + hlist_add_head(&page->hash_link, bucket); + if (!metaphysical) + rmap_write_protect(vcpu, gfn); + return page; +} + +static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page) +{ + unsigned i; + u64 *pt; + u64 ent; + + pt = __va(page->page_hpa); + + if (page->role.level == PT_PAGE_TABLE_LEVEL) { + for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { + if (pt[i] & PT_PRESENT_MASK) + rmap_remove(vcpu, &pt[i]); + pt[i] = 0; + } + kvm_arch_ops->tlb_flush(vcpu); + return; + } + + for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { + ent = pt[i]; + + pt[i] = 0; + if (!(ent & PT_PRESENT_MASK)) + continue; + ent &= PT64_BASE_ADDR_MASK; + mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); + } +} + +static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page, + u64 *parent_pte) +{ + mmu_page_remove_parent_pte(vcpu, page, parent_pte); +} + +static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page) +{ + u64 *parent_pte; + + while (page->multimapped || page->parent_pte) { + if (!page->multimapped) + parent_pte = page->parent_pte; + else { + struct kvm_pte_chain *chain; + + chain = container_of(page->parent_ptes.first, + struct kvm_pte_chain, link); + parent_pte = chain->parent_ptes[0]; + } + BUG_ON(!parent_pte); + kvm_mmu_put_page(vcpu, page, parent_pte); + *parent_pte = 0; + } + kvm_mmu_page_unlink_children(vcpu, page); + if (!page->root_count) { + hlist_del(&page->hash_link); + kvm_mmu_free_page(vcpu, page->page_hpa); + } else { + list_del(&page->link); + list_add(&page->link, &vcpu->kvm->active_mmu_pages); + } +} + +static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + unsigned index; + struct hlist_head *bucket; + struct kvm_mmu_page *page; + struct hlist_node *node, *n; + int r; + + pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); + r = 0; + index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; + bucket = &vcpu->kvm->mmu_page_hash[index]; + hlist_for_each_entry_safe(page, node, n, bucket, hash_link) + if (page->gfn == gfn && !page->role.metaphysical) { + pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, + page->role.word); + kvm_mmu_zap_page(vcpu, page); + r = 1; + } + return r; } static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) @@ -225,35 +729,6 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) return gpa_to_hpa(vcpu, gpa); } - -static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa, - int level) -{ - ASSERT(vcpu); - ASSERT(VALID_PAGE(page_hpa)); - ASSERT(level <= PT64_ROOT_LEVEL && level > 0); - - if (level == 1) - memset(__va(page_hpa), 0, PAGE_SIZE); - else { - u64 *pos; - u64 *end; - - for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE; - pos != end; pos++) { - u64 current_ent = *pos; - - *pos = 0; - if (is_present_pte(current_ent)) - release_pt_page_64(vcpu, - current_ent & - PT64_BASE_ADDR_MASK, - level - 1); - } - } - kvm_mmu_free_page(vcpu, page_hpa); -} - static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } @@ -266,52 +741,109 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) for (; ; level--) { u32 index = PT64_INDEX(v, level); u64 *table; + u64 pte; ASSERT(VALID_PAGE(table_addr)); table = __va(table_addr); if (level == 1) { + pte = table[index]; + if (is_present_pte(pte) && is_writeble_pte(pte)) + return 0; mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); page_header_update_slot(vcpu->kvm, table, v); table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; + rmap_add(vcpu, &table[index]); return 0; } if (table[index] == 0) { - hpa_t new_table = kvm_mmu_alloc_page(vcpu, - &table[index]); - - if (!VALID_PAGE(new_table)) { + struct kvm_mmu_page *new_table; + gfn_t pseudo_gfn; + + pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK) + >> PAGE_SHIFT; + new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, + v, level - 1, + 1, &table[index]); + if (!new_table) { pgprintk("nonpaging_map: ENOMEM\n"); return -ENOMEM; } - if (level == PT32E_ROOT_LEVEL) - table[index] = new_table | PT_PRESENT_MASK; - else - table[index] = new_table | PT_PRESENT_MASK | - PT_WRITABLE_MASK | PT_USER_MASK; + table[index] = new_table->page_hpa | PT_PRESENT_MASK + | PT_WRITABLE_MASK | PT_USER_MASK; } table_addr = table[index] & PT64_BASE_ADDR_MASK; } } -static void nonpaging_flush(struct kvm_vcpu *vcpu) +static void mmu_free_roots(struct kvm_vcpu *vcpu) { - hpa_t root = vcpu->mmu.root_hpa; + int i; + struct kvm_mmu_page *page; - ++kvm_stat.tlb_flush; - pgprintk("nonpaging_flush\n"); - ASSERT(VALID_PAGE(root)); - release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level); - root = kvm_mmu_alloc_page(vcpu, NULL); - ASSERT(VALID_PAGE(root)); - vcpu->mmu.root_hpa = root; - if (is_paging(vcpu)) - root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)); - kvm_arch_ops->set_cr3(vcpu, root); - kvm_arch_ops->tlb_flush(vcpu); +#ifdef CONFIG_X86_64 + if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { + hpa_t root = vcpu->mmu.root_hpa; + + ASSERT(VALID_PAGE(root)); + page = page_header(root); + --page->root_count; + vcpu->mmu.root_hpa = INVALID_PAGE; + return; + } +#endif + for (i = 0; i < 4; ++i) { + hpa_t root = vcpu->mmu.pae_root[i]; + + ASSERT(VALID_PAGE(root)); + root &= PT64_BASE_ADDR_MASK; + page = page_header(root); + --page->root_count; + vcpu->mmu.pae_root[i] = INVALID_PAGE; + } + vcpu->mmu.root_hpa = INVALID_PAGE; +} + +static void mmu_alloc_roots(struct kvm_vcpu *vcpu) +{ + int i; + gfn_t root_gfn; + struct kvm_mmu_page *page; + + root_gfn = vcpu->cr3 >> PAGE_SHIFT; + +#ifdef CONFIG_X86_64 + if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { + hpa_t root = vcpu->mmu.root_hpa; + + ASSERT(!VALID_PAGE(root)); + page = kvm_mmu_get_page(vcpu, root_gfn, 0, + PT64_ROOT_LEVEL, 0, NULL); + root = page->page_hpa; + ++page->root_count; + vcpu->mmu.root_hpa = root; + return; + } +#endif + for (i = 0; i < 4; ++i) { + hpa_t root = vcpu->mmu.pae_root[i]; + + ASSERT(!VALID_PAGE(root)); + if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) + root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT; + else if (vcpu->mmu.root_level == 0) + root_gfn = 0; + page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, + PT32_ROOT_LEVEL, !is_paging(vcpu), + NULL); + root = page->page_hpa; + ++page->root_count; + vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; + } + vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root); } static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) @@ -322,43 +854,29 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code) { - int ret; gpa_t addr = gva; + hpa_t paddr; + int r; + + r = mmu_topup_memory_caches(vcpu); + if (r) + return r; ASSERT(vcpu); ASSERT(VALID_PAGE(vcpu->mmu.root_hpa)); - for (;;) { - hpa_t paddr; - - paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK); - if (is_error_hpa(paddr)) - return 1; + paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK); - ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr); - if (ret) { - nonpaging_flush(vcpu); - continue; - } - break; - } - return ret; -} + if (is_error_hpa(paddr)) + return 1; -static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) -{ + return nonpaging_map(vcpu, addr & PAGE_MASK, paddr); } static void nonpaging_free(struct kvm_vcpu *vcpu) { - hpa_t root; - - ASSERT(vcpu); - root = vcpu->mmu.root_hpa; - if (VALID_PAGE(root)) - release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level); - vcpu->mmu.root_hpa = INVALID_PAGE; + mmu_free_roots(vcpu); } static int nonpaging_init_context(struct kvm_vcpu *vcpu) @@ -367,40 +885,31 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) context->new_cr3 = nonpaging_new_cr3; context->page_fault = nonpaging_page_fault; - context->inval_page = nonpaging_inval_page; context->gva_to_gpa = nonpaging_gva_to_gpa; context->free = nonpaging_free; - context->root_level = PT32E_ROOT_LEVEL; + context->root_level = 0; context->shadow_root_level = PT32E_ROOT_LEVEL; - context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); + mmu_alloc_roots(vcpu); ASSERT(VALID_PAGE(context->root_hpa)); kvm_arch_ops->set_cr3(vcpu, context->root_hpa); return 0; } - static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) { - struct kvm_mmu_page *page, *npage; - - list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages, - link) { - if (page->global) - continue; - - if (!page->parent_pte) - continue; - - *page->parent_pte = 0; - release_pt_page_64(vcpu, page->page_hpa, 1); - } ++kvm_stat.tlb_flush; kvm_arch_ops->tlb_flush(vcpu); } static void paging_new_cr3(struct kvm_vcpu *vcpu) { + pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); + mmu_free_roots(vcpu); + if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) + kvm_mmu_free_some_pages(vcpu); + mmu_alloc_roots(vcpu); kvm_mmu_flush_tlb(vcpu); + kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); } static void mark_pagetable_nonglobal(void *shadow_pte) @@ -412,7 +921,8 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, u64 *shadow_pte, gpa_t gaddr, int dirty, - u64 access_bits) + u64 access_bits, + gfn_t gfn) { hpa_t paddr; @@ -420,13 +930,10 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, if (!dirty) access_bits &= ~PT_WRITABLE_MASK; - if (access_bits & PT_WRITABLE_MASK) - mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); + paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); *shadow_pte |= access_bits; - paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); - if (!(*shadow_pte & PT_GLOBAL_MASK)) mark_pagetable_nonglobal(shadow_pte); @@ -434,10 +941,31 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, *shadow_pte |= gaddr; *shadow_pte |= PT_SHADOW_IO_MARK; *shadow_pte &= ~PT_PRESENT_MASK; - } else { - *shadow_pte |= paddr; - page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); + return; + } + + *shadow_pte |= paddr; + + if (access_bits & PT_WRITABLE_MASK) { + struct kvm_mmu_page *shadow; + + shadow = kvm_mmu_lookup_page(vcpu, gfn); + if (shadow) { + pgprintk("%s: found shadow page for %lx, marking ro\n", + __FUNCTION__, gfn); + access_bits &= ~PT_WRITABLE_MASK; + if (is_writeble_pte(*shadow_pte)) { + *shadow_pte &= ~PT_WRITABLE_MASK; + kvm_arch_ops->tlb_flush(vcpu); + } + } } + + if (access_bits & PT_WRITABLE_MASK) + mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); + + page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); + rmap_add(vcpu, shadow_pte); } static void inject_page_fault(struct kvm_vcpu *vcpu, @@ -474,41 +1002,6 @@ static int may_access(u64 pte, int write, int user) return 1; } -/* - * Remove a shadow pte. - */ -static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) -{ - hpa_t page_addr = vcpu->mmu.root_hpa; - int level = vcpu->mmu.shadow_root_level; - - ++kvm_stat.invlpg; - - for (; ; level--) { - u32 index = PT64_INDEX(addr, level); - u64 *table = __va(page_addr); - - if (level == PT_PAGE_TABLE_LEVEL ) { - table[index] = 0; - return; - } - - if (!is_present_pte(table[index])) - return; - - page_addr = table[index] & PT64_BASE_ADDR_MASK; - - if (level == PT_DIRECTORY_LEVEL && - (table[index] & PT_SHADOW_PS_MARK)) { - table[index] = 0; - release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL); - - kvm_arch_ops->tlb_flush(vcpu); - return; - } - } -} - static void paging_free(struct kvm_vcpu *vcpu) { nonpaging_free(vcpu); @@ -522,37 +1015,40 @@ static void paging_free(struct kvm_vcpu *vcpu) #include "paging_tmpl.h" #undef PTTYPE -static int paging64_init_context(struct kvm_vcpu *vcpu) +static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) { struct kvm_mmu *context = &vcpu->mmu; ASSERT(is_pae(vcpu)); context->new_cr3 = paging_new_cr3; context->page_fault = paging64_page_fault; - context->inval_page = paging_inval_page; context->gva_to_gpa = paging64_gva_to_gpa; context->free = paging_free; - context->root_level = PT64_ROOT_LEVEL; - context->shadow_root_level = PT64_ROOT_LEVEL; - context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); + context->root_level = level; + context->shadow_root_level = level; + mmu_alloc_roots(vcpu); ASSERT(VALID_PAGE(context->root_hpa)); kvm_arch_ops->set_cr3(vcpu, context->root_hpa | (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); return 0; } +static int paging64_init_context(struct kvm_vcpu *vcpu) +{ + return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); +} + static int paging32_init_context(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = &vcpu->mmu; context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; - context->inval_page = paging_inval_page; context->gva_to_gpa = paging32_gva_to_gpa; context->free = paging_free; context->root_level = PT32_ROOT_LEVEL; context->shadow_root_level = PT32E_ROOT_LEVEL; - context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); + mmu_alloc_roots(vcpu); ASSERT(VALID_PAGE(context->root_hpa)); kvm_arch_ops->set_cr3(vcpu, context->root_hpa | (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); @@ -561,14 +1057,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) static int paging32E_init_context(struct kvm_vcpu *vcpu) { - int ret; - - if ((ret = paging64_init_context(vcpu))) - return ret; - - vcpu->mmu.root_level = PT32E_ROOT_LEVEL; - vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL; - return 0; + return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); } static int init_kvm_mmu(struct kvm_vcpu *vcpu) @@ -597,41 +1086,161 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) { + int r; + destroy_kvm_mmu(vcpu); - return init_kvm_mmu(vcpu); + r = init_kvm_mmu(vcpu); + if (r < 0) + goto out; + r = mmu_topup_memory_caches(vcpu); +out: + return r; } -static void free_mmu_pages(struct kvm_vcpu *vcpu) +void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) { - while (!list_empty(&vcpu->free_pages)) { + gfn_t gfn = gpa >> PAGE_SHIFT; + struct kvm_mmu_page *page; + struct kvm_mmu_page *child; + struct hlist_node *node, *n; + struct hlist_head *bucket; + unsigned index; + u64 *spte; + u64 pte; + unsigned offset = offset_in_page(gpa); + unsigned pte_size; + unsigned page_offset; + unsigned misaligned; + int level; + int flooded = 0; + + pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); + if (gfn == vcpu->last_pt_write_gfn) { + ++vcpu->last_pt_write_count; + if (vcpu->last_pt_write_count >= 3) + flooded = 1; + } else { + vcpu->last_pt_write_gfn = gfn; + vcpu->last_pt_write_count = 1; + } + index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; + bucket = &vcpu->kvm->mmu_page_hash[index]; + hlist_for_each_entry_safe(page, node, n, bucket, hash_link) { + if (page->gfn != gfn || page->role.metaphysical) + continue; + pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; + misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); + if (misaligned || flooded) { + /* + * Misaligned accesses are too much trouble to fix + * up; also, they usually indicate a page is not used + * as a page table. + * + * If we're seeing too many writes to a page, + * it may no longer be a page table, or we may be + * forking, in which case it is better to unmap the + * page. + */ + pgprintk("misaligned: gpa %llx bytes %d role %x\n", + gpa, bytes, page->role.word); + kvm_mmu_zap_page(vcpu, page); + continue; + } + page_offset = offset; + level = page->role.level; + if (page->role.glevels == PT32_ROOT_LEVEL) { + page_offset <<= 1; /* 32->64 */ + page_offset &= ~PAGE_MASK; + } + spte = __va(page->page_hpa); + spte += page_offset / sizeof(*spte); + pte = *spte; + if (is_present_pte(pte)) { + if (level == PT_PAGE_TABLE_LEVEL) + rmap_remove(vcpu, spte); + else { + child = page_header(pte & PT64_BASE_ADDR_MASK); + mmu_page_remove_parent_pte(vcpu, child, spte); + } + } + *spte = 0; + } +} + +void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) +{ +} + +int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) +{ + gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); + + return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT); +} + +void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) +{ + while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) { struct kvm_mmu_page *page; + page = container_of(vcpu->kvm->active_mmu_pages.prev, + struct kvm_mmu_page, link); + kvm_mmu_zap_page(vcpu, page); + } +} +EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages); + +static void free_mmu_pages(struct kvm_vcpu *vcpu) +{ + struct kvm_mmu_page *page; + + while (!list_empty(&vcpu->kvm->active_mmu_pages)) { + page = container_of(vcpu->kvm->active_mmu_pages.next, + struct kvm_mmu_page, link); + kvm_mmu_zap_page(vcpu, page); + } + while (!list_empty(&vcpu->free_pages)) { page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); list_del(&page->link); __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT)); page->page_hpa = INVALID_PAGE; } + free_page((unsigned long)vcpu->mmu.pae_root); } static int alloc_mmu_pages(struct kvm_vcpu *vcpu) { + struct page *page; int i; ASSERT(vcpu); for (i = 0; i < KVM_NUM_MMU_PAGES; i++) { - struct page *page; struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i]; INIT_LIST_HEAD(&page_header->link); - if ((page = alloc_page(GFP_KVM_MMU)) == NULL) + if ((page = alloc_page(GFP_KERNEL)) == NULL) goto error_1; page->private = (unsigned long)page_header; page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; memset(__va(page_header->page_hpa), 0, PAGE_SIZE); list_add(&page_header->link, &vcpu->free_pages); + ++vcpu->kvm->n_free_mmu_pages; } + + /* + * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. + * Therefore we need to allocate shadow page tables in the first + * 4GB of memory, which happens to fit the DMA32 zone. + */ + page = alloc_page(GFP_KERNEL | __GFP_DMA32); + if (!page) + goto error_1; + vcpu->mmu.pae_root = page_address(page); + for (i = 0; i < 4; ++i) + vcpu->mmu.pae_root[i] = INVALID_PAGE; + return 0; error_1: @@ -663,10 +1272,12 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) destroy_kvm_mmu(vcpu); free_mmu_pages(vcpu); + mmu_free_memory_caches(vcpu); } -void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) +void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) { + struct kvm *kvm = vcpu->kvm; struct kvm_mmu_page *page; list_for_each_entry(page, &kvm->active_mmu_pages, link) { @@ -679,8 +1290,169 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) pt = __va(page->page_hpa); for (i = 0; i < PT64_ENT_PER_PAGE; ++i) /* avoid RMW */ - if (pt[i] & PT_WRITABLE_MASK) + if (pt[i] & PT_WRITABLE_MASK) { + rmap_remove(vcpu, &pt[i]); pt[i] &= ~PT_WRITABLE_MASK; + } + } +} + +#ifdef AUDIT + +static const char *audit_msg; + +static gva_t canonicalize(gva_t gva) +{ +#ifdef CONFIG_X86_64 + gva = (long long)(gva << 16) >> 16; +#endif + return gva; +} +static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, + gva_t va, int level) +{ + u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK); + int i; + gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1)); + + for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { + u64 ent = pt[i]; + + if (!ent & PT_PRESENT_MASK) + continue; + + va = canonicalize(va); + if (level > 1) + audit_mappings_page(vcpu, ent, va, level - 1); + else { + gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va); + hpa_t hpa = gpa_to_hpa(vcpu, gpa); + + if ((ent & PT_PRESENT_MASK) + && (ent & PT64_BASE_ADDR_MASK) != hpa) + printk(KERN_ERR "audit error: (%s) levels %d" + " gva %lx gpa %llx hpa %llx ent %llx\n", + audit_msg, vcpu->mmu.root_level, + va, gpa, hpa, ent); + } } } + +static void audit_mappings(struct kvm_vcpu *vcpu) +{ + int i; + + if (vcpu->mmu.root_level == 4) + audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4); + else + for (i = 0; i < 4; ++i) + if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK) + audit_mappings_page(vcpu, + vcpu->mmu.pae_root[i], + i << 30, + 2); +} + +static int count_rmaps(struct kvm_vcpu *vcpu) +{ + int nmaps = 0; + int i, j, k; + + for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { + struct kvm_memory_slot *m = &vcpu->kvm->memslots[i]; + struct kvm_rmap_desc *d; + + for (j = 0; j < m->npages; ++j) { + struct page *page = m->phys_mem[j]; + + if (!page->private) + continue; + if (!(page->private & 1)) { + ++nmaps; + continue; + } + d = (struct kvm_rmap_desc *)(page->private & ~1ul); + while (d) { + for (k = 0; k < RMAP_EXT; ++k) + if (d->shadow_ptes[k]) + ++nmaps; + else + break; + d = d->more; + } + } + } + return nmaps; +} + +static int count_writable_mappings(struct kvm_vcpu *vcpu) +{ + int nmaps = 0; + struct kvm_mmu_page *page; + int i; + + list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { + u64 *pt = __va(page->page_hpa); + + if (page->role.level != PT_PAGE_TABLE_LEVEL) + continue; + + for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { + u64 ent = pt[i]; + + if (!(ent & PT_PRESENT_MASK)) + continue; + if (!(ent & PT_WRITABLE_MASK)) + continue; + ++nmaps; + } + } + return nmaps; +} + +static void audit_rmap(struct kvm_vcpu *vcpu) +{ + int n_rmap = count_rmaps(vcpu); + int n_actual = count_writable_mappings(vcpu); + + if (n_rmap != n_actual) + printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", + __FUNCTION__, audit_msg, n_rmap, n_actual); +} + +static void audit_write_protection(struct kvm_vcpu *vcpu) +{ + struct kvm_mmu_page *page; + + list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { + hfn_t hfn; + struct page *pg; + + if (page->role.metaphysical) + continue; + + hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT) + >> PAGE_SHIFT; + pg = pfn_to_page(hfn); + if (pg->private) + printk(KERN_ERR "%s: (%s) shadow page has writable" + " mappings: gfn %lx role %x\n", + __FUNCTION__, audit_msg, page->gfn, + page->role.word); + } +} + +static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) +{ + int olddbg = dbg; + + dbg = 0; + audit_msg = msg; + audit_rmap(vcpu); + audit_write_protection(vcpu); + audit_mappings(vcpu); + dbg = olddbg; +} + +#endif diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 09bb9b4ed12..2dbf4307ed9 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -32,6 +32,11 @@ #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) #define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK + #ifdef CONFIG_X86_64 + #define PT_MAX_FULL_LEVELS 4 + #else + #define PT_MAX_FULL_LEVELS 2 + #endif #elif PTTYPE == 32 #define pt_element_t u32 #define guest_walker guest_walker32 @@ -42,6 +47,7 @@ #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) #define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK + #define PT_MAX_FULL_LEVELS 2 #else #error Invalid PTTYPE value #endif @@ -52,93 +58,126 @@ */ struct guest_walker { int level; + gfn_t table_gfn[PT_MAX_FULL_LEVELS]; pt_element_t *table; + pt_element_t *ptep; pt_element_t inherited_ar; + gfn_t gfn; }; -static void FNAME(init_walker)(struct guest_walker *walker, - struct kvm_vcpu *vcpu) +/* + * Fetch a guest pte for a guest virtual address + */ +static void FNAME(walk_addr)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, gva_t addr) { hpa_t hpa; struct kvm_memory_slot *slot; + pt_element_t *ptep; + pt_element_t root; + gfn_t table_gfn; + pgprintk("%s: addr %lx\n", __FUNCTION__, addr); walker->level = vcpu->mmu.root_level; - slot = gfn_to_memslot(vcpu->kvm, - (vcpu->cr3 & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); - hpa = safe_gpa_to_hpa(vcpu, vcpu->cr3 & PT64_BASE_ADDR_MASK); + walker->table = NULL; + root = vcpu->cr3; +#if PTTYPE == 64 + if (!is_long_mode(vcpu)) { + walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; + root = *walker->ptep; + if (!(root & PT_PRESENT_MASK)) + return; + --walker->level; + } +#endif + table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; + walker->table_gfn[walker->level - 1] = table_gfn; + pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, + walker->level - 1, table_gfn); + slot = gfn_to_memslot(vcpu->kvm, table_gfn); + hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK); walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0); ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || (vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0); - walker->table = (pt_element_t *)( (unsigned long)walker->table | - (unsigned long)(vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) ); walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; + + for (;;) { + int index = PT_INDEX(addr, walker->level); + hpa_t paddr; + + ptep = &walker->table[index]; + ASSERT(((unsigned long)walker->table & PAGE_MASK) == + ((unsigned long)ptep & PAGE_MASK)); + + if (is_present_pte(*ptep) && !(*ptep & PT_ACCESSED_MASK)) + *ptep |= PT_ACCESSED_MASK; + + if (!is_present_pte(*ptep)) + break; + + if (walker->level == PT_PAGE_TABLE_LEVEL) { + walker->gfn = (*ptep & PT_BASE_ADDR_MASK) + >> PAGE_SHIFT; + break; + } + + if (walker->level == PT_DIRECTORY_LEVEL + && (*ptep & PT_PAGE_SIZE_MASK) + && (PTTYPE == 64 || is_pse(vcpu))) { + walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK) + >> PAGE_SHIFT; + walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL); + break; + } + + if (walker->level != 3 || is_long_mode(vcpu)) + walker->inherited_ar &= walker->table[index]; + table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; + paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK); + kunmap_atomic(walker->table, KM_USER0); + walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT), + KM_USER0); + --walker->level; + walker->table_gfn[walker->level - 1 ] = table_gfn; + pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, + walker->level - 1, table_gfn); + } + walker->ptep = ptep; + pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); } static void FNAME(release_walker)(struct guest_walker *walker) { - kunmap_atomic(walker->table, KM_USER0); + if (walker->table) + kunmap_atomic(walker->table, KM_USER0); } static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte, - u64 *shadow_pte, u64 access_bits) + u64 *shadow_pte, u64 access_bits, gfn_t gfn) { ASSERT(*shadow_pte == 0); access_bits &= guest_pte; *shadow_pte = (guest_pte & PT_PTE_COPY_MASK); set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK, - guest_pte & PT_DIRTY_MASK, access_bits); + guest_pte & PT_DIRTY_MASK, access_bits, gfn); } static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde, - u64 *shadow_pte, u64 access_bits, - int index) + u64 *shadow_pte, u64 access_bits, gfn_t gfn) { gpa_t gaddr; ASSERT(*shadow_pte == 0); access_bits &= guest_pde; - gaddr = (guest_pde & PT_DIR_BASE_ADDR_MASK) + PAGE_SIZE * index; + gaddr = (gpa_t)gfn << PAGE_SHIFT; if (PTTYPE == 32 && is_cpuid_PSE36()) gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) << (32 - PT32_DIR_PSE36_SHIFT); *shadow_pte = guest_pde & PT_PTE_COPY_MASK; set_pte_common(vcpu, shadow_pte, gaddr, - guest_pde & PT_DIRTY_MASK, access_bits); -} - -/* - * Fetch a guest pte from a specific level in the paging hierarchy. - */ -static pt_element_t *FNAME(fetch_guest)(struct kvm_vcpu *vcpu, - struct guest_walker *walker, - int level, - gva_t addr) -{ - - ASSERT(level > 0 && level <= walker->level); - - for (;;) { - int index = PT_INDEX(addr, walker->level); - hpa_t paddr; - - ASSERT(((unsigned long)walker->table & PAGE_MASK) == - ((unsigned long)&walker->table[index] & PAGE_MASK)); - if (level == walker->level || - !is_present_pte(walker->table[index]) || - (walker->level == PT_DIRECTORY_LEVEL && - (walker->table[index] & PT_PAGE_SIZE_MASK) && - (PTTYPE == 64 || is_pse(vcpu)))) - return &walker->table[index]; - if (walker->level != 3 || is_long_mode(vcpu)) - walker->inherited_ar &= walker->table[index]; - paddr = safe_gpa_to_hpa(vcpu, walker->table[index] & PT_BASE_ADDR_MASK); - kunmap_atomic(walker->table, KM_USER0); - walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT), - KM_USER0); - --walker->level; - } + guest_pde & PT_DIRTY_MASK, access_bits, gfn); } /* @@ -150,15 +189,26 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, hpa_t shadow_addr; int level; u64 *prev_shadow_ent = NULL; + pt_element_t *guest_ent = walker->ptep; + + if (!is_present_pte(*guest_ent)) + return NULL; shadow_addr = vcpu->mmu.root_hpa; level = vcpu->mmu.shadow_root_level; + if (level == PT32E_ROOT_LEVEL) { + shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3]; + shadow_addr &= PT64_BASE_ADDR_MASK; + --level; + } for (; ; level--) { u32 index = SHADOW_PT_INDEX(addr, level); u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index; - pt_element_t *guest_ent; + struct kvm_mmu_page *shadow_page; u64 shadow_pte; + int metaphysical; + gfn_t table_gfn; if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { if (level == PT_PAGE_TABLE_LEVEL) @@ -168,21 +218,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, continue; } - if (PTTYPE == 32 && level > PT32_ROOT_LEVEL) { - ASSERT(level == PT32E_ROOT_LEVEL); - guest_ent = FNAME(fetch_guest)(vcpu, walker, - PT32_ROOT_LEVEL, addr); - } else - guest_ent = FNAME(fetch_guest)(vcpu, walker, - level, addr); - - if (!is_present_pte(*guest_ent)) - return NULL; - - /* Don't set accessed bit on PAE PDPTRs */ - if (vcpu->mmu.root_level != 3 || walker->level != 3) - *guest_ent |= PT_ACCESSED_MASK; - if (level == PT_PAGE_TABLE_LEVEL) { if (walker->level == PT_DIRECTORY_LEVEL) { @@ -190,21 +225,30 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, *prev_shadow_ent |= PT_SHADOW_PS_MARK; FNAME(set_pde)(vcpu, *guest_ent, shadow_ent, walker->inherited_ar, - PT_INDEX(addr, PT_PAGE_TABLE_LEVEL)); + walker->gfn); } else { ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); - FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, walker->inherited_ar); + FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, + walker->inherited_ar, + walker->gfn); } return shadow_ent; } - shadow_addr = kvm_mmu_alloc_page(vcpu, shadow_ent); - if (!VALID_PAGE(shadow_addr)) - return ERR_PTR(-ENOMEM); - shadow_pte = shadow_addr | PT_PRESENT_MASK; - if (vcpu->mmu.root_level > 3 || level != 3) - shadow_pte |= PT_ACCESSED_MASK - | PT_WRITABLE_MASK | PT_USER_MASK; + if (level - 1 == PT_PAGE_TABLE_LEVEL + && walker->level == PT_DIRECTORY_LEVEL) { + metaphysical = 1; + table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) + >> PAGE_SHIFT; + } else { + metaphysical = 0; + table_gfn = walker->table_gfn[level - 2]; + } + shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, + metaphysical, shadow_ent); + shadow_addr = shadow_page->page_hpa; + shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK + | PT_WRITABLE_MASK | PT_USER_MASK; *shadow_ent = shadow_pte; prev_shadow_ent = shadow_ent; } @@ -221,11 +265,13 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu, u64 *shadow_ent, struct guest_walker *walker, gva_t addr, - int user) + int user, + int *write_pt) { pt_element_t *guest_ent; int writable_shadow; gfn_t gfn; + struct kvm_mmu_page *page; if (is_writeble_pte(*shadow_ent)) return 0; @@ -250,17 +296,35 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu, *shadow_ent &= ~PT_USER_MASK; } - guest_ent = FNAME(fetch_guest)(vcpu, walker, PT_PAGE_TABLE_LEVEL, addr); + guest_ent = walker->ptep; if (!is_present_pte(*guest_ent)) { *shadow_ent = 0; return 0; } - gfn = (*guest_ent & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; + gfn = walker->gfn; + + if (user) { + /* + * Usermode page faults won't be for page table updates. + */ + while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { + pgprintk("%s: zap %lx %x\n", + __FUNCTION__, gfn, page->role.word); + kvm_mmu_zap_page(vcpu, page); + } + } else if (kvm_mmu_lookup_page(vcpu, gfn)) { + pgprintk("%s: found shadow page for %lx, marking ro\n", + __FUNCTION__, gfn); + *guest_ent |= PT_DIRTY_MASK; + *write_pt = 1; + return 0; + } mark_page_dirty(vcpu->kvm, gfn); *shadow_ent |= PT_WRITABLE_MASK; *guest_ent |= PT_DIRTY_MASK; + rmap_add(vcpu, shadow_ent); return 1; } @@ -276,7 +340,8 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu, * - normal guest page fault due to the guest pte marked not present, not * writable, or not executable * - * Returns: 1 if we need to emulate the instruction, 0 otherwise + * Returns: 1 if we need to emulate the instruction, 0 otherwise, or + * a negative value on error. */ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code) @@ -287,39 +352,47 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker walker; u64 *shadow_pte; int fixed; + int write_pt = 0; + int r; + + pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code); + kvm_mmu_audit(vcpu, "pre page fault"); + + r = mmu_topup_memory_caches(vcpu); + if (r) + return r; /* * Look up the shadow pte for the faulting address. */ - for (;;) { - FNAME(init_walker)(&walker, vcpu); - shadow_pte = FNAME(fetch)(vcpu, addr, &walker); - if (IS_ERR(shadow_pte)) { /* must be -ENOMEM */ - nonpaging_flush(vcpu); - FNAME(release_walker)(&walker); - continue; - } - break; - } + FNAME(walk_addr)(&walker, vcpu, addr); + shadow_pte = FNAME(fetch)(vcpu, addr, &walker); /* * The page is not mapped by the guest. Let the guest handle it. */ if (!shadow_pte) { + pgprintk("%s: not mapped\n", __FUNCTION__); inject_page_fault(vcpu, addr, error_code); FNAME(release_walker)(&walker); return 0; } + pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__, + shadow_pte, *shadow_pte); + /* * Update the shadow pte. */ if (write_fault) fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr, - user_fault); + user_fault, &write_pt); else fixed = fix_read_pf(shadow_pte); + pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__, + shadow_pte, *shadow_pte); + FNAME(release_walker)(&walker); /* @@ -331,20 +404,23 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, pgprintk("%s: io work, no access\n", __FUNCTION__); inject_page_fault(vcpu, addr, error_code | PFERR_PRESENT_MASK); + kvm_mmu_audit(vcpu, "post page fault (io)"); return 0; } /* * pte not present, guest page fault. */ - if (pte_present && !fixed) { + if (pte_present && !fixed && !write_pt) { inject_page_fault(vcpu, addr, error_code); + kvm_mmu_audit(vcpu, "post page fault (guest)"); return 0; } ++kvm_stat.pf_fixed; + kvm_mmu_audit(vcpu, "post page fault (fixed)"); - return 0; + return write_pt; } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) @@ -353,9 +429,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) pt_element_t guest_pte; gpa_t gpa; - FNAME(init_walker)(&walker, vcpu); - guest_pte = *FNAME(fetch_guest)(vcpu, &walker, PT_PAGE_TABLE_LEVEL, - vaddr); + FNAME(walk_addr)(&walker, vcpu, vaddr); + guest_pte = *walker.ptep; FNAME(release_walker)(&walker); if (!is_present_pte(guest_pte)) @@ -389,3 +464,4 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) #undef PT_PTE_COPY_MASK #undef PT_NON_PTE_COPY_MASK #undef PT_DIR_BASE_ADDR_MASK +#undef PT_MAX_FULL_LEVELS diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index fa042873571..ccc06b1b91b 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -235,6 +235,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip; vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; + + vcpu->interrupt_window_open = 1; } static int has_svm(void) @@ -495,7 +497,6 @@ static void init_vmcb(struct vmcb *vmcb) /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */ (1ULL << INTERCEPT_CPUID) | (1ULL << INTERCEPT_HLT) | - (1ULL << INTERCEPT_INVLPG) | (1ULL << INTERCEPT_INVLPGA) | (1ULL << INTERCEPT_IOIO_PROT) | (1ULL << INTERCEPT_MSR_PROT) | @@ -700,6 +701,10 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) vcpu->svm->vmcb->save.gdtr.base = dt->base ; } +static void svm_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) +{ +} + static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { #ifdef CONFIG_X86_64 @@ -847,6 +852,7 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) u64 fault_address; u32 error_code; enum emulation_result er; + int r; if (is_external_interrupt(exit_int_info)) push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); @@ -855,7 +861,12 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) fault_address = vcpu->svm->vmcb->control.exit_info_2; error_code = vcpu->svm->vmcb->control.exit_info_1; - if (!vcpu->mmu.page_fault(vcpu, fault_address, error_code)) { + r = kvm_mmu_page_fault(vcpu, fault_address, error_code); + if (r < 0) { + spin_unlock(&vcpu->kvm->lock); + return r; + } + if (!r) { spin_unlock(&vcpu->kvm->lock); return 1; } @@ -1031,10 +1042,11 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; skip_emulated_instruction(vcpu); - if (vcpu->irq_summary && (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)) + if (vcpu->irq_summary) return 1; kvm_run->exit_reason = KVM_EXIT_HLT; + ++kvm_stat.halt_exits; return 0; } @@ -1186,6 +1198,23 @@ static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return rdmsr_interception(vcpu, kvm_run); } +static int interrupt_window_interception(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + /* + * If the user space waits to inject interrupts, exit as soon as + * possible + */ + if (kvm_run->request_interrupt_window && + !vcpu->irq_summary) { + ++kvm_stat.irq_window_exits; + kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; + return 0; + } + + return 1; +} + static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) = { [SVM_EXIT_READ_CR0] = emulate_on_interception, @@ -1210,6 +1239,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, [SVM_EXIT_NMI] = nop_on_interception, [SVM_EXIT_SMI] = nop_on_interception, [SVM_EXIT_INIT] = nop_on_interception, + [SVM_EXIT_VINTR] = interrupt_window_interception, /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ [SVM_EXIT_CPUID] = cpuid_interception, [SVM_EXIT_HLT] = halt_interception, @@ -1278,15 +1308,11 @@ static void pre_svm_run(struct kvm_vcpu *vcpu) } -static inline void kvm_try_inject_irq(struct kvm_vcpu *vcpu) +static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { struct vmcb_control_area *control; - if (!vcpu->irq_summary) - return; - control = &vcpu->svm->vmcb->control; - control->int_vector = pop_irq(vcpu); control->int_ctl &= ~V_INTR_PRIO_MASK; control->int_ctl |= V_IRQ_MASK | @@ -1301,6 +1327,59 @@ static void kvm_reput_irq(struct kvm_vcpu *vcpu) control->int_ctl &= ~V_IRQ_MASK; push_irq(vcpu, control->int_vector); } + + vcpu->interrupt_window_open = + !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); +} + +static void do_interrupt_requests(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + struct vmcb_control_area *control = &vcpu->svm->vmcb->control; + + vcpu->interrupt_window_open = + (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && + (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); + + if (vcpu->interrupt_window_open && vcpu->irq_summary) + /* + * If interrupts enabled, and not blocked by sti or mov ss. Good. + */ + kvm_do_inject_irq(vcpu); + + /* + * Interrupts blocked. Wait for unblock. + */ + if (!vcpu->interrupt_window_open && + (vcpu->irq_summary || kvm_run->request_interrupt_window)) { + control->intercept |= 1ULL << INTERCEPT_VINTR; + } else + control->intercept &= ~(1ULL << INTERCEPT_VINTR); +} + +static void post_kvm_run_save(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && + vcpu->irq_summary == 0); + kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; + kvm_run->cr8 = vcpu->cr8; + kvm_run->apic_base = vcpu->apic_base; +} + +/* + * Check if userspace requested an interrupt window, and that the + * interrupt window is open. + * + * No need to exit to userspace if we already have an interrupt queued. + */ +static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return (!vcpu->irq_summary && + kvm_run->request_interrupt_window && + vcpu->interrupt_window_open && + (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); } static void save_db_regs(unsigned long *db_regs) @@ -1324,9 +1403,10 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) u16 fs_selector; u16 gs_selector; u16 ldt_selector; + int r; again: - kvm_try_inject_irq(vcpu); + do_interrupt_requests(vcpu, kvm_run); clgi(); @@ -1487,18 +1567,28 @@ again: if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code; + post_kvm_run_save(vcpu, kvm_run); return 0; } - if (handle_exit(vcpu, kvm_run)) { + r = handle_exit(vcpu, kvm_run); + if (r > 0) { if (signal_pending(current)) { ++kvm_stat.signal_exits; + post_kvm_run_save(vcpu, kvm_run); + return -EINTR; + } + + if (dm_request_for_irq_injection(vcpu, kvm_run)) { + ++kvm_stat.request_irq_exits; + post_kvm_run_save(vcpu, kvm_run); return -EINTR; } kvm_resched(vcpu); goto again; } - return 0; + post_kvm_run_save(vcpu, kvm_run); + return r; } static void svm_flush_tlb(struct kvm_vcpu *vcpu) @@ -1565,6 +1655,7 @@ static struct kvm_arch_ops svm_arch_ops = { .get_segment = svm_get_segment, .set_segment = svm_set_segment, .get_cs_db_l_bits = svm_get_cs_db_l_bits, + .decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits, .set_cr0 = svm_set_cr0, .set_cr0_no_modeswitch = svm_set_cr0, .set_cr3 = svm_set_cr3, diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index d0a2c2d5342..d4701cb4c65 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -116,7 +116,7 @@ static void vmcs_clear(struct vmcs *vmcs) static void __vcpu_clear(void *arg) { struct kvm_vcpu *vcpu = arg; - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); if (vcpu->cpu == cpu) vmcs_clear(vcpu->vmcs); @@ -152,15 +152,21 @@ static u64 vmcs_read64(unsigned long field) #endif } +static noinline void vmwrite_error(unsigned long field, unsigned long value) +{ + printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", + field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); + dump_stack(); +} + static void vmcs_writel(unsigned long field, unsigned long value) { u8 error; asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" : "=q"(error) : "a"(value), "d"(field) : "cc" ); - if (error) - printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", - field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); + if (unlikely(error)) + vmwrite_error(field, value); } static void vmcs_write16(unsigned long field, u16 value) @@ -263,6 +269,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) if (interruptibility & 3) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility & ~3); + vcpu->interrupt_window_open = 1; } static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) @@ -541,7 +548,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) static struct vmcs *alloc_vmcs(void) { - return alloc_vmcs_cpu(smp_processor_id()); + return alloc_vmcs_cpu(raw_smp_processor_id()); } static void free_vmcs(struct vmcs *vmcs) @@ -736,6 +743,15 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif +static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) +{ + vcpu->cr0 &= KVM_GUEST_CR0_MASK; + vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK; + + vcpu->cr4 &= KVM_GUEST_CR4_MASK; + vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; +} + static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) @@ -1011,8 +1027,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, 0xfff0); vmcs_writel(GUEST_RSP, 0); - vmcs_writel(GUEST_CR3, 0); - //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 vmcs_writel(GUEST_DR7, 0x400); @@ -1049,7 +1063,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ | CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */ - | CPU_BASED_INVDPG_EXITING | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ ); @@ -1094,14 +1107,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) rdmsrl(MSR_IA32_SYSENTER_EIP, a); vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ - ret = -ENOMEM; - vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vcpu->guest_msrs) - goto out; - vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vcpu->host_msrs) - goto out_free_guest_msrs; - for (i = 0; i < NR_VMX_MSR; ++i) { u32 index = vmx_msr_index[i]; u32 data_low, data_high; @@ -1155,8 +1160,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) return 0; -out_free_guest_msrs: - kfree(vcpu->guest_msrs); out: return ret; } @@ -1224,21 +1227,34 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } -static void kvm_try_inject_irq(struct kvm_vcpu *vcpu) + +static void do_interrupt_requests(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) { - if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) - && (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0) + u32 cpu_based_vm_exec_control; + + vcpu->interrupt_window_open = + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); + + if (vcpu->interrupt_window_open && + vcpu->irq_summary && + !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) /* - * Interrupts enabled, and not blocked by sti or mov ss. Good. + * If interrupts enabled, and not blocked by sti or mov ss. Good. */ kvm_do_inject_irq(vcpu); - else + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + if (!vcpu->interrupt_window_open && + (vcpu->irq_summary || kvm_run->request_interrupt_window)) /* * Interrupts blocked. Wait for unblock. */ - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) - | CPU_BASED_VIRTUAL_INTR_PENDING); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + else + cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); } static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) @@ -1277,6 +1293,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) unsigned long cr2, rip; u32 vect_info; enum emulation_result er; + int r; vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); intr_info = vmcs_read32(VM_EXIT_INTR_INFO); @@ -1305,7 +1322,12 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) cr2 = vmcs_readl(EXIT_QUALIFICATION); spin_lock(&vcpu->kvm->lock); - if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) { + r = kvm_mmu_page_fault(vcpu, cr2, error_code); + if (r < 0) { + spin_unlock(&vcpu->kvm->lock); + return r; + } + if (!r) { spin_unlock(&vcpu->kvm->lock); return 1; } @@ -1425,17 +1447,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } -static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - u64 address = vmcs_read64(EXIT_QUALIFICATION); - int instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - spin_lock(&vcpu->kvm->lock); - vcpu->mmu.inval_page(vcpu, address); - spin_unlock(&vcpu->kvm->lock); - vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) + instruction_length); - return 1; -} - static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; @@ -1575,23 +1586,40 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static void post_kvm_run_save(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0; + kvm_run->cr8 = vcpu->cr8; + kvm_run->apic_base = vcpu->apic_base; + kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && + vcpu->irq_summary == 0); +} + static int handle_interrupt_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - /* Turn off interrupt window reporting. */ - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) - & ~CPU_BASED_VIRTUAL_INTR_PENDING); + /* + * If the user space waits to inject interrupts, exit as soon as + * possible + */ + if (kvm_run->request_interrupt_window && + !vcpu->irq_summary) { + kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; + ++kvm_stat.irq_window_exits; + return 0; + } return 1; } static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { skip_emulated_instruction(vcpu); - if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)) + if (vcpu->irq_summary) return 1; kvm_run->exit_reason = KVM_EXIT_HLT; + ++kvm_stat.halt_exits; return 0; } @@ -1605,7 +1633,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_EXCEPTION_NMI] = handle_exception, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_IO_INSTRUCTION] = handle_io, - [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_CR_ACCESS] = handle_cr, [EXIT_REASON_DR_ACCESS] = handle_dr, [EXIT_REASON_CPUID] = handle_cpuid, @@ -1642,11 +1669,27 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return 0; } +/* + * Check if userspace requested an interrupt window, and that the + * interrupt window is open. + * + * No need to exit to userspace if we already have an interrupt queued. + */ +static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return (!vcpu->irq_summary && + kvm_run->request_interrupt_window && + vcpu->interrupt_window_open && + (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); +} + static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u8 fail; u16 fs_sel, gs_sel, ldt_sel; int fs_gs_ldt_reload_needed; + int r; again: /* @@ -1673,9 +1716,7 @@ again: vmcs_writel(HOST_GS_BASE, segment_base(gs_sel)); #endif - if (vcpu->irq_summary && - !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) - kvm_try_inject_irq(vcpu); + do_interrupt_requests(vcpu, kvm_run); if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); @@ -1812,6 +1853,7 @@ again: fx_save(vcpu->guest_fx_image); fx_restore(vcpu->host_fx_image); + vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; #ifndef CONFIG_X86_64 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); @@ -1821,6 +1863,7 @@ again: if (fail) { kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); + r = 0; } else { if (fs_gs_ldt_reload_needed) { load_ldt(ldt_sel); @@ -1840,17 +1883,28 @@ again: } vcpu->launched = 1; kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; - if (kvm_handle_exit(kvm_run, vcpu)) { + r = kvm_handle_exit(kvm_run, vcpu); + if (r > 0) { /* Give scheduler a change to reschedule. */ if (signal_pending(current)) { ++kvm_stat.signal_exits; + post_kvm_run_save(vcpu, kvm_run); + return -EINTR; + } + + if (dm_request_for_irq_injection(vcpu, kvm_run)) { + ++kvm_stat.request_irq_exits; + post_kvm_run_save(vcpu, kvm_run); return -EINTR; } + kvm_resched(vcpu); goto again; } } - return 0; + + post_kvm_run_save(vcpu, kvm_run); + return r; } static void vmx_flush_tlb(struct kvm_vcpu *vcpu) @@ -1906,13 +1960,33 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { struct vmcs *vmcs; + vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vcpu->guest_msrs) + return -ENOMEM; + + vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vcpu->host_msrs) + goto out_free_guest_msrs; + vmcs = alloc_vmcs(); if (!vmcs) - return -ENOMEM; + goto out_free_msrs; + vmcs_clear(vmcs); vcpu->vmcs = vmcs; vcpu->launched = 0; + return 0; + +out_free_msrs: + kfree(vcpu->host_msrs); + vcpu->host_msrs = NULL; + +out_free_guest_msrs: + kfree(vcpu->guest_msrs); + vcpu->guest_msrs = NULL; + + return -ENOMEM; } static struct kvm_arch_ops vmx_arch_ops = { @@ -1936,6 +2010,7 @@ static struct kvm_arch_ops vmx_arch_ops = { .get_segment = vmx_get_segment, .set_segment = vmx_set_segment, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, + .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits, .set_cr0 = vmx_set_cr0, .set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch, .set_cr3 = vmx_set_cr3, diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 1bff3e925fd..be70795b482 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -1323,7 +1323,7 @@ twobyte_special_insn: ctxt)) != 0)) goto done; if ((old_lo != _regs[VCPU_REGS_RAX]) - || (old_hi != _regs[VCPU_REGS_RDI])) { + || (old_hi != _regs[VCPU_REGS_RDX])) { _regs[VCPU_REGS_RAX] = old_lo; _regs[VCPU_REGS_RDX] = old_hi; _eflags &= ~EFLG_ZF; diff --git a/drivers/leds/leds-s3c24xx.c b/drivers/leds/leds-s3c24xx.c index fb1edc1c9ed..50914439d86 100644 --- a/drivers/leds/leds-s3c24xx.c +++ b/drivers/leds/leds-s3c24xx.c @@ -16,7 +16,7 @@ #include <linux/platform_device.h> #include <linux/leds.h> -#include <asm/arch/hardware.h> +#include <asm/hardware.h> #include <asm/arch/regs-gpio.h> #include <asm/arch/leds-gpio.h> diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index c8558d4ed50..8ca75e52f63 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -44,6 +44,7 @@ #include <linux/sysdev.h> #include <linux/freezer.h> #include <linux/syscalls.h> +#include <linux/suspend.h> #include <linux/cpu.h> #include <asm/prom.h> #include <asm/machdep.h> diff --git a/drivers/net/Space.c b/drivers/net/Space.c index 602ed31a5dd..9305eb9b1b9 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -349,22 +349,11 @@ static void __init trif_probe2(int unit) #endif -/* - * The loopback device is global so it can be directly referenced - * by the network code. Also, it must be first on device list. - */ -extern int loopback_init(void); - /* Statically configured drivers -- order matters here. */ static int __init net_olddevs_init(void) { int num; - if (loopback_init()) { - printk(KERN_ERR "Network loopback device setup failed\n"); - } - - #ifdef CONFIG_SBNI for (num = 0; num < 8; ++num) sbni_probe(num); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index c26a4b8e552..ca2b21f9d44 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -154,8 +154,8 @@ static int ifb_xmit(struct sk_buff *skb, struct net_device *dev) int ret = 0; u32 from = G_TC_FROM(skb->tc_verd); - stats->tx_packets++; - stats->tx_bytes+=skb->len; + stats->rx_packets++; + stats->rx_bytes+=skb->len; if (!from || !skb->input_dev) { dropped: diff --git a/drivers/net/ixgb/ixgb.h b/drivers/net/ixgb/ixgb.h index 50ffe90488f..f4aba4355b1 100644 --- a/drivers/net/ixgb/ixgb.h +++ b/drivers/net/ixgb/ixgb.h @@ -171,6 +171,7 @@ struct ixgb_adapter { /* TX */ struct ixgb_desc_ring tx_ring ____cacheline_aligned_in_smp; + unsigned int restart_queue; unsigned long timeo_start; uint32_t tx_cmd_type; uint64_t hw_csum_tx_good; diff --git a/drivers/net/ixgb/ixgb_ethtool.c b/drivers/net/ixgb/ixgb_ethtool.c index cd22523fb03..82c044d6e08 100644 --- a/drivers/net/ixgb/ixgb_ethtool.c +++ b/drivers/net/ixgb/ixgb_ethtool.c @@ -79,6 +79,7 @@ static struct ixgb_stats ixgb_gstrings_stats[] = { {"tx_window_errors", IXGB_STAT(net_stats.tx_window_errors)}, {"tx_deferred_ok", IXGB_STAT(stats.dc)}, {"tx_timeout_count", IXGB_STAT(tx_timeout_count) }, + {"tx_restart_queue", IXGB_STAT(restart_queue) }, {"rx_long_length_errors", IXGB_STAT(stats.roc)}, {"rx_short_length_errors", IXGB_STAT(stats.ruc)}, #ifdef NETIF_F_TSO diff --git a/drivers/net/ixgb/ixgb_hw.c b/drivers/net/ixgb/ixgb_hw.c index 02089b64e42..ecbf45861c6 100644 --- a/drivers/net/ixgb/ixgb_hw.c +++ b/drivers/net/ixgb/ixgb_hw.c @@ -399,8 +399,9 @@ ixgb_init_rx_addrs(struct ixgb_hw *hw) /* Zero out the other 15 receive addresses. */ DEBUGOUT("Clearing RAR[1-15]\n"); for(i = 1; i < IXGB_RAR_ENTRIES; i++) { - IXGB_WRITE_REG_ARRAY(hw, RA, (i << 1), 0); + /* Write high reg first to disable the AV bit first */ IXGB_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0); + IXGB_WRITE_REG_ARRAY(hw, RA, (i << 1), 0); } return; diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index e628126c9c4..a083a918923 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -36,7 +36,7 @@ static char ixgb_driver_string[] = "Intel(R) PRO/10GbE Network Driver"; #else #define DRIVERNAPI "-NAPI" #endif -#define DRV_VERSION "1.0.117-k2"DRIVERNAPI +#define DRV_VERSION "1.0.126-k2"DRIVERNAPI char ixgb_driver_version[] = DRV_VERSION; static char ixgb_copyright[] = "Copyright (c) 1999-2006 Intel Corporation."; @@ -1287,6 +1287,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb, struct ixgb_buffer *buffer_info; int len = skb->len; unsigned int offset = 0, size, count = 0, i; + unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int nr_frags = skb_shinfo(skb)->nr_frags; unsigned int f; @@ -1298,6 +1299,11 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb, while(len) { buffer_info = &tx_ring->buffer_info[i]; size = min(len, IXGB_MAX_DATA_PER_TXD); + /* Workaround for premature desc write-backs + * in TSO mode. Append 4-byte sentinel desc */ + if (unlikely(mss && !nr_frags && size == len && size > 8)) + size -= 4; + buffer_info->length = size; WARN_ON(buffer_info->dma != 0); buffer_info->dma = @@ -1324,6 +1330,13 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb, while(len) { buffer_info = &tx_ring->buffer_info[i]; size = min(len, IXGB_MAX_DATA_PER_TXD); + + /* Workaround for premature desc write-backs + * in TSO mode. Append 4-byte sentinel desc */ + if (unlikely(mss && !nr_frags && size == len + && size > 8)) + size -= 4; + buffer_info->length = size; buffer_info->dma = pci_map_page(adapter->pdev, @@ -1398,11 +1411,43 @@ ixgb_tx_queue(struct ixgb_adapter *adapter, int count, int vlan_id,int tx_flags) IXGB_WRITE_REG(&adapter->hw, TDT, i); } +static int __ixgb_maybe_stop_tx(struct net_device *netdev, int size) +{ + struct ixgb_adapter *adapter = netdev_priv(netdev); + struct ixgb_desc_ring *tx_ring = &adapter->tx_ring; + + netif_stop_queue(netdev); + /* Herbert's original patch had: + * smp_mb__after_netif_stop_queue(); + * but since that doesn't exist yet, just open code it. */ + smp_mb(); + + /* We need to check again in a case another CPU has just + * made room available. */ + if (likely(IXGB_DESC_UNUSED(tx_ring) < size)) + return -EBUSY; + + /* A reprieve! */ + netif_start_queue(netdev); + ++adapter->restart_queue; + return 0; +} + +static int ixgb_maybe_stop_tx(struct net_device *netdev, + struct ixgb_desc_ring *tx_ring, int size) +{ + if (likely(IXGB_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __ixgb_maybe_stop_tx(netdev, size); +} + + /* Tx Descriptors needed, worst case */ #define TXD_USE_COUNT(S) (((S) >> IXGB_MAX_TXD_PWR) + \ (((S) & (IXGB_MAX_DATA_PER_TXD - 1)) ? 1 : 0)) -#define DESC_NEEDED TXD_USE_COUNT(IXGB_MAX_DATA_PER_TXD) + \ - MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE) + 1 +#define DESC_NEEDED TXD_USE_COUNT(IXGB_MAX_DATA_PER_TXD) /* skb->date */ + \ + MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE) + 1 /* for context */ \ + + 1 /* one more needed for sentinel TSO workaround */ static int ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) @@ -1430,7 +1475,8 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) spin_lock_irqsave(&adapter->tx_lock, flags); #endif - if(unlikely(IXGB_DESC_UNUSED(&adapter->tx_ring) < DESC_NEEDED)) { + if (unlikely(ixgb_maybe_stop_tx(netdev, &adapter->tx_ring, + DESC_NEEDED))) { netif_stop_queue(netdev); spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_BUSY; @@ -1468,8 +1514,7 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) #ifdef NETIF_F_LLTX /* Make sure there is space in the ring for the next send. */ - if(unlikely(IXGB_DESC_UNUSED(&adapter->tx_ring) < DESC_NEEDED)) - netif_stop_queue(netdev); + ixgb_maybe_stop_tx(netdev, &adapter->tx_ring, DESC_NEEDED); spin_unlock_irqrestore(&adapter->tx_lock, flags); diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 82c10dec1b5..2b739fd584f 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -229,9 +229,11 @@ struct net_device loopback_dev = { }; /* Setup and register the loopback device. */ -int __init loopback_init(void) +static int __init loopback_init(void) { return register_netdev(&loopback_dev); }; +module_init(loopback_init); + EXPORT_SYMBOL(loopback_dev); diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c index d79d141a601..8844c20eac2 100644 --- a/drivers/net/qla3xxx.c +++ b/drivers/net/qla3xxx.c @@ -208,6 +208,15 @@ static void ql_write_common_reg(struct ql3_adapter *qdev, return; } +static void ql_write_nvram_reg(struct ql3_adapter *qdev, + u32 __iomem *reg, u32 value) +{ + writel(value, reg); + readl(reg); + udelay(1); + return; +} + static void ql_write_page0_reg(struct ql3_adapter *qdev, u32 __iomem *reg, u32 value) { @@ -336,9 +345,9 @@ static void fm93c56a_select(struct ql3_adapter *qdev) qdev->mem_map_registers; qdev->eeprom_cmd_data = AUBURN_EEPROM_CS_1; - ql_write_common_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, ISP_NVRAM_MASK | qdev->eeprom_cmd_data); - ql_write_common_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data)); } @@ -355,14 +364,14 @@ static void fm93c56a_cmd(struct ql3_adapter *qdev, u32 cmd, u32 eepromAddr) qdev->mem_map_registers; /* Clock in a zero, then do the start bit */ - ql_write_common_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, ISP_NVRAM_MASK | qdev->eeprom_cmd_data | AUBURN_EEPROM_DO_1); - ql_write_common_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, ISP_NVRAM_MASK | qdev-> eeprom_cmd_data | AUBURN_EEPROM_DO_1 | AUBURN_EEPROM_CLK_RISE); - ql_write_common_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, ISP_NVRAM_MASK | qdev-> eeprom_cmd_data | AUBURN_EEPROM_DO_1 | AUBURN_EEPROM_CLK_FALL); @@ -378,20 +387,20 @@ static void fm93c56a_cmd(struct ql3_adapter *qdev, u32 cmd, u32 eepromAddr) * If the bit changed, then change the DO state to * match */ - ql_write_common_reg(qdev, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs. serialPortInterfaceReg, ISP_NVRAM_MASK | qdev-> eeprom_cmd_data | dataBit); previousBit = dataBit; } - ql_write_common_reg(qdev, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs. serialPortInterfaceReg, ISP_NVRAM_MASK | qdev-> eeprom_cmd_data | dataBit | AUBURN_EEPROM_CLK_RISE); - ql_write_common_reg(qdev, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs. serialPortInterfaceReg, ISP_NVRAM_MASK | qdev-> @@ -412,20 +421,20 @@ static void fm93c56a_cmd(struct ql3_adapter *qdev, u32 cmd, u32 eepromAddr) * If the bit changed, then change the DO state to * match */ - ql_write_common_reg(qdev, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs. serialPortInterfaceReg, ISP_NVRAM_MASK | qdev-> eeprom_cmd_data | dataBit); previousBit = dataBit; } - ql_write_common_reg(qdev, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs. serialPortInterfaceReg, ISP_NVRAM_MASK | qdev-> eeprom_cmd_data | dataBit | AUBURN_EEPROM_CLK_RISE); - ql_write_common_reg(qdev, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs. serialPortInterfaceReg, ISP_NVRAM_MASK | qdev-> @@ -443,7 +452,7 @@ static void fm93c56a_deselect(struct ql3_adapter *qdev) struct ql3xxx_port_registers __iomem *port_regs = qdev->mem_map_registers; qdev->eeprom_cmd_data = AUBURN_EEPROM_CS_0; - ql_write_common_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, ISP_NVRAM_MASK | qdev->eeprom_cmd_data); } @@ -461,12 +470,12 @@ static void fm93c56a_datain(struct ql3_adapter *qdev, unsigned short *value) /* Read the data bits */ /* The first bit is a dummy. Clock right over it. */ for (i = 0; i < dataBits; i++) { - ql_write_common_reg(qdev, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs. serialPortInterfaceReg, ISP_NVRAM_MASK | qdev->eeprom_cmd_data | AUBURN_EEPROM_CLK_RISE); - ql_write_common_reg(qdev, + ql_write_nvram_reg(qdev, &port_regs->CommonRegs. serialPortInterfaceReg, ISP_NVRAM_MASK | qdev->eeprom_cmd_data | @@ -3370,7 +3379,6 @@ static int __devinit ql3xxx_probe(struct pci_dev *pdev, SET_MODULE_OWNER(ndev); SET_NETDEV_DEV(ndev, &pdev->dev); - ndev->features = NETIF_F_LLTX; if (pci_using_dac) ndev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 785e4a535f9..616be8d0fa8 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -90,7 +90,8 @@ #define ADVERTISE_MASK (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | \ SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \ - SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full) + SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full | \ + SUPPORTED_Pause | SUPPORTED_Autoneg) #define DRV_NAME "sungem" #define DRV_VERSION "0.98" diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c index 49800b25907..d21991ee88c 100644 --- a/drivers/net/sungem_phy.c +++ b/drivers/net/sungem_phy.c @@ -3,10 +3,9 @@ * * This file could be shared with other drivers. * - * (c) 2002, Benjamin Herrenscmidt (benh@kernel.crashing.org) + * (c) 2002-2007, Benjamin Herrenscmidt (benh@kernel.crashing.org) * * TODO: - * - Implement WOL * - Add support for PHYs that provide an IRQ line * - Eventually moved the entire polling state machine in * there (out of the eth driver), so that it can easily be @@ -152,6 +151,44 @@ static int bcm5221_suspend(struct mii_phy* phy) return 0; } +static int bcm5241_init(struct mii_phy* phy) +{ + u16 data; + + data = phy_read(phy, MII_BCM5221_TEST); + phy_write(phy, MII_BCM5221_TEST, + data | MII_BCM5221_TEST_ENABLE_SHADOWS); + + data = phy_read(phy, MII_BCM5221_SHDOW_AUX_STAT2); + phy_write(phy, MII_BCM5221_SHDOW_AUX_STAT2, + data | MII_BCM5221_SHDOW_AUX_STAT2_APD); + + data = phy_read(phy, MII_BCM5221_SHDOW_AUX_MODE4); + phy_write(phy, MII_BCM5221_SHDOW_AUX_MODE4, + data & ~MII_BCM5241_SHDOW_AUX_MODE4_STANDBYPWR); + + data = phy_read(phy, MII_BCM5221_TEST); + phy_write(phy, MII_BCM5221_TEST, + data & ~MII_BCM5221_TEST_ENABLE_SHADOWS); + + return 0; +} + +static int bcm5241_suspend(struct mii_phy* phy) +{ + u16 data; + + data = phy_read(phy, MII_BCM5221_TEST); + phy_write(phy, MII_BCM5221_TEST, + data | MII_BCM5221_TEST_ENABLE_SHADOWS); + + data = phy_read(phy, MII_BCM5221_SHDOW_AUX_MODE4); + phy_write(phy, MII_BCM5221_SHDOW_AUX_MODE4, + data | MII_BCM5241_SHDOW_AUX_MODE4_STANDBYPWR); + + return 0; +} + static int bcm5400_init(struct mii_phy* phy) { u16 data; @@ -373,6 +410,10 @@ static int bcm54xx_setup_aneg(struct mii_phy *phy, u32 advertise) adv |= ADVERTISE_100HALF; if (advertise & ADVERTISED_100baseT_Full) adv |= ADVERTISE_100FULL; + if (advertise & ADVERTISED_Pause) + adv |= ADVERTISE_PAUSE_CAP; + if (advertise & ADVERTISED_Asym_Pause) + adv |= ADVERTISE_PAUSE_ASYM; phy_write(phy, MII_ADVERTISE, adv); /* Setup 1000BT advertise */ @@ -436,12 +477,15 @@ static int bcm54xx_read_link(struct mii_phy *phy) val = phy_read(phy, MII_BCM5400_AUXSTATUS); link_mode = ((val & MII_BCM5400_AUXSTATUS_LINKMODE_MASK) >> MII_BCM5400_AUXSTATUS_LINKMODE_SHIFT); - phy->duplex = phy_BCM5400_link_table[link_mode][0] ? DUPLEX_FULL : DUPLEX_HALF; + phy->duplex = phy_BCM5400_link_table[link_mode][0] ? + DUPLEX_FULL : DUPLEX_HALF; phy->speed = phy_BCM5400_link_table[link_mode][2] ? SPEED_1000 : - (phy_BCM5400_link_table[link_mode][1] ? SPEED_100 : SPEED_10); + (phy_BCM5400_link_table[link_mode][1] ? + SPEED_100 : SPEED_10); val = phy_read(phy, MII_LPA); - phy->pause = ((val & LPA_PAUSE) != 0); + phy->pause = (phy->duplex == DUPLEX_FULL) && + ((val & LPA_PAUSE) != 0); } /* On non-aneg, we assume what we put in BMCR is the speed, * though magic-aneg shouldn't prevent this case from occurring @@ -450,6 +494,28 @@ static int bcm54xx_read_link(struct mii_phy *phy) return 0; } +static int marvell88e1111_init(struct mii_phy* phy) +{ + u16 rev; + + /* magic init sequence for rev 0 */ + rev = phy_read(phy, MII_PHYSID2) & 0x000f; + if (rev == 0) { + phy_write(phy, 0x1d, 0x000a); + phy_write(phy, 0x1e, 0x0821); + + phy_write(phy, 0x1d, 0x0006); + phy_write(phy, 0x1e, 0x8600); + + phy_write(phy, 0x1d, 0x000b); + phy_write(phy, 0x1e, 0x0100); + + phy_write(phy, 0x1d, 0x0004); + phy_write(phy, 0x1e, 0x4850); + } + return 0; +} + static int marvell_setup_aneg(struct mii_phy *phy, u32 advertise) { u16 ctl, adv; @@ -471,6 +537,10 @@ static int marvell_setup_aneg(struct mii_phy *phy, u32 advertise) adv |= ADVERTISE_100HALF; if (advertise & ADVERTISED_100baseT_Full) adv |= ADVERTISE_100FULL; + if (advertise & ADVERTISED_Pause) + adv |= ADVERTISE_PAUSE_CAP; + if (advertise & ADVERTISED_Asym_Pause) + adv |= ADVERTISE_PAUSE_ASYM; phy_write(phy, MII_ADVERTISE, adv); /* Setup 1000BT advertise & enable crossover detect @@ -549,7 +619,7 @@ static int marvell_setup_forced(struct mii_phy *phy, int speed, int fd) static int marvell_read_link(struct mii_phy *phy) { - u16 status; + u16 status, pmask; if (phy->autoneg) { status = phy_read(phy, MII_M1011_PHY_SPEC_STATUS); @@ -565,7 +635,9 @@ static int marvell_read_link(struct mii_phy *phy) phy->duplex = DUPLEX_FULL; else phy->duplex = DUPLEX_HALF; - phy->pause = 0; /* XXX Check against spec ! */ + pmask = MII_M1011_PHY_SPEC_STATUS_TX_PAUSE | + MII_M1011_PHY_SPEC_STATUS_RX_PAUSE; + phy->pause = (status & pmask) == pmask; } /* On non-aneg, we assume what we put in BMCR is the speed, * though magic-aneg shouldn't prevent this case from occurring @@ -595,6 +667,10 @@ static int genmii_setup_aneg(struct mii_phy *phy, u32 advertise) adv |= ADVERTISE_100HALF; if (advertise & ADVERTISED_100baseT_Full) adv |= ADVERTISE_100FULL; + if (advertise & ADVERTISED_Pause) + adv |= ADVERTISE_PAUSE_CAP; + if (advertise & ADVERTISED_Asym_Pause) + adv |= ADVERTISE_PAUSE_ASYM; phy_write(phy, MII_ADVERTISE, adv); /* Start/Restart aneg */ @@ -666,7 +742,8 @@ static int genmii_read_link(struct mii_phy *phy) phy->speed = SPEED_100; else phy->speed = SPEED_10; - phy->pause = 0; + phy->pause = (phy->duplex == DUPLEX_FULL) && + ((lpa & LPA_PAUSE) != 0); } /* On non-aneg, we assume what we put in BMCR is the speed, * though magic-aneg shouldn't prevent this case from occurring @@ -676,11 +753,19 @@ static int genmii_read_link(struct mii_phy *phy) } -#define MII_BASIC_FEATURES (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | \ - SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \ - SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII) -#define MII_GBIT_FEATURES (MII_BASIC_FEATURES | \ - SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full) +#define MII_BASIC_FEATURES \ + (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | \ + SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \ + SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII | \ + SUPPORTED_Pause) + +/* On gigabit capable PHYs, we advertise Pause support but not asym pause + * support for now as I'm not sure it's supported and Darwin doesn't do + * it neither. --BenH. + */ +#define MII_GBIT_FEATURES \ + (MII_BASIC_FEATURES | \ + SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full) /* Broadcom BCM 5201 */ static struct mii_phy_ops bcm5201_phy_ops = { @@ -720,6 +805,24 @@ static struct mii_phy_def bcm5221_phy_def = { .ops = &bcm5221_phy_ops }; +/* Broadcom BCM 5241 */ +static struct mii_phy_ops bcm5241_phy_ops = { + .suspend = bcm5241_suspend, + .init = bcm5241_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link, +}; +static struct mii_phy_def bcm5241_phy_def = { + .phy_id = 0x0143bc30, + .phy_id_mask = 0xfffffff0, + .name = "BCM5241", + .features = MII_BASIC_FEATURES, + .magic_aneg = 1, + .ops = &bcm5241_phy_ops +}; + /* Broadcom BCM 5400 */ static struct mii_phy_ops bcm5400_phy_ops = { .init = bcm5400_init, @@ -854,11 +957,8 @@ static struct mii_phy_def bcm5462V_phy_def = { .ops = &bcm5462V_phy_ops }; -/* Marvell 88E1101 (Apple seem to deal with 2 different revs, - * I masked out the 8 last bits to get both, but some specs - * would be useful here) --BenH. - */ -static struct mii_phy_ops marvell_phy_ops = { +/* Marvell 88E1101 amd 88E1111 */ +static struct mii_phy_ops marvell88e1101_phy_ops = { .suspend = generic_suspend, .setup_aneg = marvell_setup_aneg, .setup_forced = marvell_setup_forced, @@ -866,13 +966,41 @@ static struct mii_phy_ops marvell_phy_ops = { .read_link = marvell_read_link }; -static struct mii_phy_def marvell_phy_def = { - .phy_id = 0x01410c00, - .phy_id_mask = 0xffffff00, - .name = "Marvell 88E1101", +static struct mii_phy_ops marvell88e1111_phy_ops = { + .init = marvell88e1111_init, + .suspend = generic_suspend, + .setup_aneg = marvell_setup_aneg, + .setup_forced = marvell_setup_forced, + .poll_link = genmii_poll_link, + .read_link = marvell_read_link +}; + +/* two revs in darwin for the 88e1101 ... I could use a datasheet + * to get the proper names... + */ +static struct mii_phy_def marvell88e1101v1_phy_def = { + .phy_id = 0x01410c20, + .phy_id_mask = 0xfffffff0, + .name = "Marvell 88E1101v1", + .features = MII_GBIT_FEATURES, + .magic_aneg = 1, + .ops = &marvell88e1101_phy_ops +}; +static struct mii_phy_def marvell88e1101v2_phy_def = { + .phy_id = 0x01410c60, + .phy_id_mask = 0xfffffff0, + .name = "Marvell 88E1101v2", + .features = MII_GBIT_FEATURES, + .magic_aneg = 1, + .ops = &marvell88e1101_phy_ops +}; +static struct mii_phy_def marvell88e1111_phy_def = { + .phy_id = 0x01410cc0, + .phy_id_mask = 0xfffffff0, + .name = "Marvell 88E1111", .features = MII_GBIT_FEATURES, .magic_aneg = 1, - .ops = &marvell_phy_ops + .ops = &marvell88e1111_phy_ops }; /* Generic implementation for most 10/100 PHYs */ @@ -895,6 +1023,7 @@ static struct mii_phy_def genmii_phy_def = { static struct mii_phy_def* mii_phy_table[] = { &bcm5201_phy_def, &bcm5221_phy_def, + &bcm5241_phy_def, &bcm5400_phy_def, &bcm5401_phy_def, &bcm5411_phy_def, @@ -902,7 +1031,9 @@ static struct mii_phy_def* mii_phy_table[] = { &bcm5421k2_phy_def, &bcm5461_phy_def, &bcm5462V_phy_def, - &marvell_phy_def, + &marvell88e1101v1_phy_def, + &marvell88e1101v2_phy_def, + &marvell88e1111_phy_def, &genmii_phy_def, NULL }; diff --git a/drivers/net/sungem_phy.h b/drivers/net/sungem_phy.h index 8ee1ca0471c..1d70ba6f9f1 100644 --- a/drivers/net/sungem_phy.h +++ b/drivers/net/sungem_phy.h @@ -30,7 +30,7 @@ struct mii_phy_def struct mii_phy { struct mii_phy_def* def; - int advertising; + u32 advertising; int mii_id; /* 1: autoneg enabled, 0: disabled */ @@ -85,6 +85,9 @@ extern int mii_phy_probe(struct mii_phy *phy, int mii_id); #define MII_BCM5221_SHDOW_AUX_MODE4_IDDQMODE 0x0001 #define MII_BCM5221_SHDOW_AUX_MODE4_CLKLOPWR 0x0004 +/* MII BCM5241 Additional registers */ +#define MII_BCM5241_SHDOW_AUX_MODE4_STANDBYPWR 0x0008 + /* MII BCM5400 1000-BASET Control register */ #define MII_BCM5400_GB_CONTROL 0x09 #define MII_BCM5400_GB_CONTROL_FULLDUPLEXCAP 0x0200 @@ -115,5 +118,7 @@ extern int mii_phy_probe(struct mii_phy *phy, int mii_id); #define MII_M1011_PHY_SPEC_STATUS_SPD_MASK 0xc000 #define MII_M1011_PHY_SPEC_STATUS_FULLDUPLEX 0x2000 #define MII_M1011_PHY_SPEC_STATUS_RESOLVED 0x0800 +#define MII_M1011_PHY_SPEC_STATUS_TX_PAUSE 0x0008 +#define MII_M1011_PHY_SPEC_STATUS_RX_PAUSE 0x0004 #endif /* __SUNGEM_PHY_H__ */ diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index f1dd81a1d59..3cfb0a3575e 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -19,7 +19,7 @@ config PCI_MSI config PCI_MULTITHREAD_PROBE bool "PCI Multi-threaded probe (EXPERIMENTAL)" - depends on PCI && EXPERIMENTAL + depends on PCI && EXPERIMENTAL && BROKEN help Say Y here if you want the PCI core to spawn a new thread for every PCI device that is probed. This can cause a huge diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 45f2b20ef51..fab381ed853 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -193,6 +193,18 @@ static struct pci_dev * pci_find_subsys(unsigned int vendor, struct pci_dev *dev; WARN_ON(in_interrupt()); + + /* + * pci_find_subsys() can be called on the ide_setup() path, super-early + * in boot. But the down_read() will enable local interrupts, which + * can cause some machines to crash. So here we detect and flag that + * situation and bail out early. + */ + if (unlikely(list_empty(&pci_devices))) { + printk(KERN_INFO "pci_find_subsys() called while pci_devices " + "is still empty\n"); + return NULL; + } down_read(&pci_bus_sem); n = from ? from->global_list.next : pci_devices.next; @@ -259,6 +271,18 @@ pci_get_subsys(unsigned int vendor, unsigned int device, struct pci_dev *dev; WARN_ON(in_interrupt()); + + /* + * pci_get_subsys() can potentially be called by drivers super-early + * in boot. But the down_read() will enable local interrupts, which + * can cause some machines to crash. So here we detect and flag that + * situation and bail out early. + */ + if (unlikely(list_empty(&pci_devices))) { + printk(KERN_NOTICE "pci_get_subsys() called while pci_devices " + "is still empty\n"); + return NULL; + } down_read(&pci_bus_sem); n = from ? from->global_list.next : pci_devices.next; diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 4f654c901c6..a724ab49a79 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -33,6 +33,8 @@ #include <asm/mach/time.h> +#include <asm/arch/at91_rtc.h> + #define AT91_RTC_FREQ 1 #define AT91_RTC_EPOCH 1900UL /* just like arch/arm/common/rtctime.c */ diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index 1460f6b769f..e7851e3739a 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -1,5 +1,5 @@ /* - * An I2C driver for the Ricoh RS5C372 RTC + * An I2C driver for Ricoh RS5C372 and RV5C38[67] RTCs * * Copyright (C) 2005 Pavel Mironchik <pmironchik@optifacio.net> * Copyright (C) 2006 Tower Technologies @@ -13,7 +13,7 @@ #include <linux/rtc.h> #include <linux/bcd.h> -#define DRV_VERSION "0.3" +#define DRV_VERSION "0.4" /* Addresses to scan */ static unsigned short normal_i2c[] = { /* 0x32,*/ I2C_CLIENT_END }; @@ -21,6 +21,13 @@ static unsigned short normal_i2c[] = { /* 0x32,*/ I2C_CLIENT_END }; /* Insmod parameters */ I2C_CLIENT_INSMOD; + +/* + * Ricoh has a family of I2C based RTCs, which differ only slightly from + * each other. Differences center on pinout (e.g. how many interrupts, + * output clock, etc) and how the control registers are used. The '372 + * is significant only because that's the one this driver first supported. + */ #define RS5C372_REG_SECS 0 #define RS5C372_REG_MINS 1 #define RS5C372_REG_HOURS 2 @@ -29,59 +36,142 @@ I2C_CLIENT_INSMOD; #define RS5C372_REG_MONTH 5 #define RS5C372_REG_YEAR 6 #define RS5C372_REG_TRIM 7 +# define RS5C372_TRIM_XSL 0x80 +# define RS5C372_TRIM_MASK 0x7F + +#define RS5C_REG_ALARM_A_MIN 8 /* or ALARM_W */ +#define RS5C_REG_ALARM_A_HOURS 9 +#define RS5C_REG_ALARM_A_WDAY 10 + +#define RS5C_REG_ALARM_B_MIN 11 /* or ALARM_D */ +#define RS5C_REG_ALARM_B_HOURS 12 +#define RS5C_REG_ALARM_B_WDAY 13 /* (ALARM_B only) */ + +#define RS5C_REG_CTRL1 14 +# define RS5C_CTRL1_AALE (1 << 7) /* or WALE */ +# define RS5C_CTRL1_BALE (1 << 6) /* or DALE */ +# define RV5C387_CTRL1_24 (1 << 5) +# define RS5C372A_CTRL1_SL1 (1 << 5) +# define RS5C_CTRL1_CT_MASK (7 << 0) +# define RS5C_CTRL1_CT0 (0 << 0) /* no periodic irq */ +# define RS5C_CTRL1_CT4 (4 << 0) /* 1 Hz level irq */ +#define RS5C_REG_CTRL2 15 +# define RS5C372_CTRL2_24 (1 << 5) +# define RS5C_CTRL2_XSTP (1 << 4) +# define RS5C_CTRL2_CTFG (1 << 2) +# define RS5C_CTRL2_AAFG (1 << 1) /* or WAFG */ +# define RS5C_CTRL2_BAFG (1 << 0) /* or DAFG */ + + +/* to read (style 1) or write registers starting at R */ +#define RS5C_ADDR(R) (((R) << 4) | 0) + + +enum rtc_type { + rtc_undef = 0, + rtc_rs5c372a, + rtc_rs5c372b, + rtc_rv5c386, + rtc_rv5c387a, +}; -#define RS5C372_TRIM_XSL 0x80 -#define RS5C372_TRIM_MASK 0x7F +/* REVISIT: this assumes that: + * - we're in the 21st century, so it's safe to ignore the century + * bit for rv5c38[67] (REG_MONTH bit 7); + * - we should use ALARM_A not ALARM_B (may be wrong on some boards) + */ +struct rs5c372 { + struct i2c_client *client; + struct rtc_device *rtc; + enum rtc_type type; + unsigned time24:1; + unsigned has_irq:1; + char buf[17]; + char *regs; + + /* on conversion to a "new style" i2c driver, this vanishes */ + struct i2c_client dev; +}; -#define RS5C372_REG_BASE 0 +static int rs5c_get_regs(struct rs5c372 *rs5c) +{ + struct i2c_client *client = rs5c->client; + struct i2c_msg msgs[] = { + { client->addr, I2C_M_RD, sizeof rs5c->buf, rs5c->buf }, + }; + + /* This implements the third reading method from the datasheet, using + * an internal address that's reset after each transaction (by STOP) + * to 0x0f ... so we read extra registers, and skip the first one. + * + * The first method doesn't work with the iop3xx adapter driver, on at + * least 80219 chips; this works around that bug. + */ + if ((i2c_transfer(client->adapter, msgs, 1)) != 1) { + pr_debug("%s: can't read registers\n", rs5c->rtc->name); + return -EIO; + } -static int rs5c372_attach(struct i2c_adapter *adapter); -static int rs5c372_detach(struct i2c_client *client); -static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind); + dev_dbg(&client->dev, + "%02x %02x %02x (%02x) %02x %02x %02x (%02x), " + "%02x %02x %02x, %02x %02x %02x; %02x %02x\n", + rs5c->regs[0], rs5c->regs[1], rs5c->regs[2], rs5c->regs[3], + rs5c->regs[4], rs5c->regs[5], rs5c->regs[6], rs5c->regs[7], + rs5c->regs[8], rs5c->regs[9], rs5c->regs[10], rs5c->regs[11], + rs5c->regs[12], rs5c->regs[13], rs5c->regs[14], rs5c->regs[15]); -struct rs5c372 { - u8 reg_addr; - u8 regs[17]; - struct i2c_msg msg[1]; - struct i2c_client client; - struct rtc_device *rtc; -}; + return 0; +} -static struct i2c_driver rs5c372_driver = { - .driver = { - .name = "rs5c372", - }, - .attach_adapter = &rs5c372_attach, - .detach_client = &rs5c372_detach, -}; +static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg) +{ + unsigned hour; -static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm) + if (rs5c->time24) + return BCD2BIN(reg & 0x3f); + + hour = BCD2BIN(reg & 0x1f); + if (hour == 12) + hour = 0; + if (reg & 0x20) + hour += 12; + return hour; +} + +static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour) { + if (rs5c->time24) + return BIN2BCD(hour); + + if (hour > 12) + return 0x20 | BIN2BCD(hour - 12); + if (hour == 12) + return 0x20 | BIN2BCD(12); + if (hour == 0) + return BIN2BCD(12); + return BIN2BCD(hour); +} - struct rs5c372 *rs5c372 = i2c_get_clientdata(client); - u8 *buf = &(rs5c372->regs[1]); +static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm) +{ + struct rs5c372 *rs5c = i2c_get_clientdata(client); + int status = rs5c_get_regs(rs5c); - /* this implements the 3rd reading method, according - * to the datasheet. rs5c372 defaults to internal - * address 0xF, so 0x0 is in regs[1] - */ + if (status < 0) + return status; - if ((i2c_transfer(client->adapter, rs5c372->msg, 1)) != 1) { - dev_err(&client->dev, "%s: read error\n", __FUNCTION__); - return -EIO; - } + tm->tm_sec = BCD2BIN(rs5c->regs[RS5C372_REG_SECS] & 0x7f); + tm->tm_min = BCD2BIN(rs5c->regs[RS5C372_REG_MINS] & 0x7f); + tm->tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C372_REG_HOURS]); - tm->tm_sec = BCD2BIN(buf[RS5C372_REG_SECS] & 0x7f); - tm->tm_min = BCD2BIN(buf[RS5C372_REG_MINS] & 0x7f); - tm->tm_hour = BCD2BIN(buf[RS5C372_REG_HOURS] & 0x3f); - tm->tm_wday = BCD2BIN(buf[RS5C372_REG_WDAY] & 0x07); - tm->tm_mday = BCD2BIN(buf[RS5C372_REG_DAY] & 0x3f); + tm->tm_wday = BCD2BIN(rs5c->regs[RS5C372_REG_WDAY] & 0x07); + tm->tm_mday = BCD2BIN(rs5c->regs[RS5C372_REG_DAY] & 0x3f); /* tm->tm_mon is zero-based */ - tm->tm_mon = BCD2BIN(buf[RS5C372_REG_MONTH] & 0x1f) - 1; + tm->tm_mon = BCD2BIN(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1; /* year is 1900 + tm->tm_year */ - tm->tm_year = BCD2BIN(buf[RS5C372_REG_YEAR]) + 100; + tm->tm_year = BCD2BIN(rs5c->regs[RS5C372_REG_YEAR]) + 100; dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, " "mday=%d, mon=%d, year=%d, wday=%d\n", @@ -89,22 +179,25 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm) tm->tm_sec, tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday); - return 0; + /* rtc might need initialization */ + return rtc_valid_tm(tm); } static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm) { - unsigned char buf[8] = { RS5C372_REG_BASE }; + struct rs5c372 *rs5c = i2c_get_clientdata(client); + unsigned char buf[8]; - dev_dbg(&client->dev, - "%s: secs=%d, mins=%d, hours=%d " + dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d " "mday=%d, mon=%d, year=%d, wday=%d\n", - __FUNCTION__, tm->tm_sec, tm->tm_min, tm->tm_hour, + __FUNCTION__, + tm->tm_sec, tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday); + buf[0] = RS5C_ADDR(RS5C372_REG_SECS); buf[1] = BIN2BCD(tm->tm_sec); buf[2] = BIN2BCD(tm->tm_min); - buf[3] = BIN2BCD(tm->tm_hour); + buf[3] = rs5c_hr2reg(rs5c, tm->tm_hour); buf[4] = BIN2BCD(tm->tm_wday); buf[5] = BIN2BCD(tm->tm_mday); buf[6] = BIN2BCD(tm->tm_mon + 1); @@ -118,21 +211,43 @@ static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm) return 0; } +#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE) +#define NEED_TRIM +#endif + +#if defined(CONFIG_RTC_INTF_SYSFS) || defined(CONFIG_RTC_INTF_SYSFS_MODULE) +#define NEED_TRIM +#endif + +#ifdef NEED_TRIM static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim) { struct rs5c372 *rs5c372 = i2c_get_clientdata(client); - u8 tmp = rs5c372->regs[RS5C372_REG_TRIM + 1]; + u8 tmp = rs5c372->regs[RS5C372_REG_TRIM]; if (osc) *osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768; if (trim) { - *trim = tmp & RS5C372_TRIM_MASK; - dev_dbg(&client->dev, "%s: raw trim=%x\n", __FUNCTION__, *trim); + dev_dbg(&client->dev, "%s: raw trim=%x\n", __FUNCTION__, tmp); + tmp &= RS5C372_TRIM_MASK; + if (tmp & 0x3e) { + int t = tmp & 0x3f; + + if (tmp & 0x40) + t = (~t | (s8)0xc0) + 1; + else + t = t - 1; + + tmp = t * 2; + } else + tmp = 0; + *trim = tmp; } return 0; } +#endif static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm) { @@ -144,25 +259,190 @@ static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm) return rs5c372_set_datetime(to_i2c_client(dev), tm); } +#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE) + +static int +rs5c_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) +{ + struct i2c_client *client = to_i2c_client(dev); + struct rs5c372 *rs5c = i2c_get_clientdata(client); + unsigned char buf[2]; + int status; + + buf[1] = rs5c->regs[RS5C_REG_CTRL1]; + switch (cmd) { + case RTC_UIE_OFF: + case RTC_UIE_ON: + /* some 327a modes use a different IRQ pin for 1Hz irqs */ + if (rs5c->type == rtc_rs5c372a + && (buf[1] & RS5C372A_CTRL1_SL1)) + return -ENOIOCTLCMD; + case RTC_AIE_OFF: + case RTC_AIE_ON: + /* these irq management calls only make sense for chips + * which are wired up to an IRQ. + */ + if (!rs5c->has_irq) + return -ENOIOCTLCMD; + break; + default: + return -ENOIOCTLCMD; + } + + status = rs5c_get_regs(rs5c); + if (status < 0) + return status; + + buf[0] = RS5C_ADDR(RS5C_REG_CTRL1); + switch (cmd) { + case RTC_AIE_OFF: /* alarm off */ + buf[1] &= ~RS5C_CTRL1_AALE; + break; + case RTC_AIE_ON: /* alarm on */ + buf[1] |= RS5C_CTRL1_AALE; + break; + case RTC_UIE_OFF: /* update off */ + buf[1] &= ~RS5C_CTRL1_CT_MASK; + break; + case RTC_UIE_ON: /* update on */ + buf[1] &= ~RS5C_CTRL1_CT_MASK; + buf[1] |= RS5C_CTRL1_CT4; + break; + } + if ((i2c_master_send(client, buf, 2)) != 2) { + printk(KERN_WARNING "%s: can't update alarm\n", + rs5c->rtc->name); + status = -EIO; + } else + rs5c->regs[RS5C_REG_CTRL1] = buf[1]; + return status; +} + +#else +#define rs5c_rtc_ioctl NULL +#endif + + +/* NOTE: Since RTC_WKALM_{RD,SET} were originally defined for EFI, + * which only exposes a polled programming interface; and since + * these calls map directly to those EFI requests; we don't demand + * we have an IRQ for this chip when we go through this API. + * + * The older x86_pc derived RTC_ALM_{READ,SET} calls require irqs + * though, managed through RTC_AIE_{ON,OFF} requests. + */ + +static int rs5c_read_alarm(struct device *dev, struct rtc_wkalrm *t) +{ + struct i2c_client *client = to_i2c_client(dev); + struct rs5c372 *rs5c = i2c_get_clientdata(client); + int status; + + status = rs5c_get_regs(rs5c); + if (status < 0) + return status; + + /* report alarm time */ + t->time.tm_sec = 0; + t->time.tm_min = BCD2BIN(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f); + t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]); + t->time.tm_mday = -1; + t->time.tm_mon = -1; + t->time.tm_year = -1; + t->time.tm_wday = -1; + t->time.tm_yday = -1; + t->time.tm_isdst = -1; + + /* ... and status */ + t->enabled = !!(rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE); + t->pending = !!(rs5c->regs[RS5C_REG_CTRL2] & RS5C_CTRL2_AAFG); + + return 0; +} + +static int rs5c_set_alarm(struct device *dev, struct rtc_wkalrm *t) +{ + struct i2c_client *client = to_i2c_client(dev); + struct rs5c372 *rs5c = i2c_get_clientdata(client); + int status; + unsigned char buf[4]; + + /* only handle up to 24 hours in the future, like RTC_ALM_SET */ + if (t->time.tm_mday != -1 + || t->time.tm_mon != -1 + || t->time.tm_year != -1) + return -EINVAL; + + /* REVISIT: round up tm_sec */ + + /* if needed, disable irq (clears pending status) */ + status = rs5c_get_regs(rs5c); + if (status < 0) + return status; + if (rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE) { + buf[0] = RS5C_ADDR(RS5C_REG_CTRL1); + buf[1] = rs5c->regs[RS5C_REG_CTRL1] & ~RS5C_CTRL1_AALE; + if (i2c_master_send(client, buf, 2) != 2) { + pr_debug("%s: can't disable alarm\n", rs5c->rtc->name); + return -EIO; + } + rs5c->regs[RS5C_REG_CTRL1] = buf[1]; + } + + /* set alarm */ + buf[0] = RS5C_ADDR(RS5C_REG_ALARM_A_MIN); + buf[1] = BIN2BCD(t->time.tm_min); + buf[2] = rs5c_hr2reg(rs5c, t->time.tm_hour); + buf[3] = 0x7f; /* any/all days */ + if ((i2c_master_send(client, buf, 4)) != 4) { + pr_debug("%s: can't set alarm time\n", rs5c->rtc->name); + return -EIO; + } + + /* ... and maybe enable its irq */ + if (t->enabled) { + buf[0] = RS5C_ADDR(RS5C_REG_CTRL1); + buf[1] = rs5c->regs[RS5C_REG_CTRL1] | RS5C_CTRL1_AALE; + if ((i2c_master_send(client, buf, 2)) != 2) + printk(KERN_WARNING "%s: can't enable alarm\n", + rs5c->rtc->name); + rs5c->regs[RS5C_REG_CTRL1] = buf[1]; + } + + return 0; +} + +#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE) + static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq) { int err, osc, trim; err = rs5c372_get_trim(to_i2c_client(dev), &osc, &trim); if (err == 0) { - seq_printf(seq, "%d.%03d KHz\n", osc / 1000, osc % 1000); - seq_printf(seq, "trim\t: %d\n", trim); + seq_printf(seq, "crystal\t\t: %d.%03d KHz\n", + osc / 1000, osc % 1000); + seq_printf(seq, "trim\t\t: %d\n", trim); } return 0; } +#else +#define rs5c372_rtc_proc NULL +#endif + static const struct rtc_class_ops rs5c372_rtc_ops = { .proc = rs5c372_rtc_proc, + .ioctl = rs5c_rtc_ioctl, .read_time = rs5c372_rtc_read_time, .set_time = rs5c372_rtc_set_time, + .read_alarm = rs5c_read_alarm, + .set_alarm = rs5c_set_alarm, }; +#if defined(CONFIG_RTC_INTF_SYSFS) || defined(CONFIG_RTC_INTF_SYSFS_MODULE) + static ssize_t rs5c372_sysfs_show_trim(struct device *dev, struct device_attribute *attr, char *buf) { @@ -172,7 +452,7 @@ static ssize_t rs5c372_sysfs_show_trim(struct device *dev, if (err) return err; - return sprintf(buf, "0x%2x\n", trim); + return sprintf(buf, "%d\n", trim); } static DEVICE_ATTR(trim, S_IRUGO, rs5c372_sysfs_show_trim, NULL); @@ -189,16 +469,35 @@ static ssize_t rs5c372_sysfs_show_osc(struct device *dev, } static DEVICE_ATTR(osc, S_IRUGO, rs5c372_sysfs_show_osc, NULL); -static int rs5c372_attach(struct i2c_adapter *adapter) +static int rs5c_sysfs_register(struct device *dev) { - return i2c_probe(adapter, &addr_data, rs5c372_probe); + int err; + + err = device_create_file(dev, &dev_attr_trim); + if (err) + return err; + err = device_create_file(dev, &dev_attr_osc); + if (err) + device_remove_file(dev, &dev_attr_trim); + + return err; +} + +#else +static int rs5c_sysfs_register(struct device *dev) +{ + return 0; } +#endif /* SYSFS */ + +static struct i2c_driver rs5c372_driver; static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind) { int err = 0; struct i2c_client *client; struct rs5c372 *rs5c372; + struct rtc_time tm; dev_dbg(adapter->class_dev.dev, "%s\n", __FUNCTION__); @@ -211,7 +510,15 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind) err = -ENOMEM; goto exit; } - client = &rs5c372->client; + + /* we read registers 0x0f then 0x00-0x0f; skip the first one */ + rs5c372->regs=&rs5c372->buf[1]; + + /* On conversion to a "new style" i2c driver, we'll be handed + * the i2c_client (we won't create it) + */ + client = &rs5c372->dev; + rs5c372->client = client; /* I2C client */ client->addr = address; @@ -222,16 +529,99 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind) i2c_set_clientdata(client, rs5c372); - rs5c372->msg[0].addr = address; - rs5c372->msg[0].flags = I2C_M_RD; - rs5c372->msg[0].len = sizeof(rs5c372->regs); - rs5c372->msg[0].buf = rs5c372->regs; - /* Inform the i2c layer */ if ((err = i2c_attach_client(client))) goto exit_kfree; - dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); + err = rs5c_get_regs(rs5c372); + if (err < 0) + goto exit_detach; + + /* For "new style" drivers, irq is in i2c_client and chip type + * info comes from i2c_client.dev.platform_data. Meanwhile: + * + * STICK BOARD-SPECIFIC SETUP CODE RIGHT HERE + */ + if (rs5c372->type == rtc_undef) { + rs5c372->type = rtc_rs5c372b; + dev_warn(&client->dev, "assuming rs5c372b\n"); + } + + /* clock may be set for am/pm or 24 hr time */ + switch (rs5c372->type) { + case rtc_rs5c372a: + case rtc_rs5c372b: + /* alarm uses ALARM_A; and nINTRA on 372a, nINTR on 372b. + * so does periodic irq, except some 327a modes. + */ + if (rs5c372->regs[RS5C_REG_CTRL2] & RS5C372_CTRL2_24) + rs5c372->time24 = 1; + break; + case rtc_rv5c386: + case rtc_rv5c387a: + if (rs5c372->regs[RS5C_REG_CTRL1] & RV5C387_CTRL1_24) + rs5c372->time24 = 1; + /* alarm uses ALARM_W; and nINTRB for alarm and periodic + * irq, on both 386 and 387 + */ + break; + default: + dev_err(&client->dev, "unknown RTC type\n"); + goto exit_detach; + } + + /* if the oscillator lost power and no other software (like + * the bootloader) set it up, do it here. + */ + if (rs5c372->regs[RS5C_REG_CTRL2] & RS5C_CTRL2_XSTP) { + unsigned char buf[3]; + + rs5c372->regs[RS5C_REG_CTRL2] &= ~RS5C_CTRL2_XSTP; + + buf[0] = RS5C_ADDR(RS5C_REG_CTRL1); + buf[1] = rs5c372->regs[RS5C_REG_CTRL1]; + buf[2] = rs5c372->regs[RS5C_REG_CTRL2]; + + /* use 24hr mode */ + switch (rs5c372->type) { + case rtc_rs5c372a: + case rtc_rs5c372b: + buf[2] |= RS5C372_CTRL2_24; + rs5c372->time24 = 1; + break; + case rtc_rv5c386: + case rtc_rv5c387a: + buf[1] |= RV5C387_CTRL1_24; + rs5c372->time24 = 1; + break; + default: + /* impossible */ + break; + } + + if ((i2c_master_send(client, buf, 3)) != 3) { + dev_err(&client->dev, "setup error\n"); + goto exit_detach; + } + rs5c372->regs[RS5C_REG_CTRL1] = buf[1]; + rs5c372->regs[RS5C_REG_CTRL2] = buf[2]; + } + + if (rs5c372_get_datetime(client, &tm) < 0) + dev_warn(&client->dev, "clock needs to be set\n"); + + dev_info(&client->dev, "%s found, %s, driver version " DRV_VERSION "\n", + ({ char *s; switch (rs5c372->type) { + case rtc_rs5c372a: s = "rs5c372a"; break; + case rtc_rs5c372b: s = "rs5c372b"; break; + case rtc_rv5c386: s = "rv5c386"; break; + case rtc_rv5c387a: s = "rv5c387a"; break; + default: s = "chip"; break; + }; s;}), + rs5c372->time24 ? "24hr" : "am/pm" + ); + + /* FIXME when client->irq exists, use it to register alarm irq */ rs5c372->rtc = rtc_device_register(rs5c372_driver.driver.name, &client->dev, &rs5c372_rtc_ops, THIS_MODULE); @@ -241,18 +631,12 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind) goto exit_detach; } - err = device_create_file(&client->dev, &dev_attr_trim); + err = rs5c_sysfs_register(&client->dev); if (err) goto exit_devreg; - err = device_create_file(&client->dev, &dev_attr_osc); - if (err) - goto exit_trim; return 0; -exit_trim: - device_remove_file(&client->dev, &dev_attr_trim); - exit_devreg: rtc_device_unregister(rs5c372->rtc); @@ -266,6 +650,11 @@ exit: return err; } +static int rs5c372_attach(struct i2c_adapter *adapter) +{ + return i2c_probe(adapter, &addr_data, rs5c372_probe); +} + static int rs5c372_detach(struct i2c_client *client) { int err; @@ -274,6 +663,8 @@ static int rs5c372_detach(struct i2c_client *client) if (rs5c372->rtc) rtc_device_unregister(rs5c372->rtc); + /* REVISIT properly destroy the sysfs files ... */ + if ((err = i2c_detach_client(client))) return err; @@ -281,6 +672,14 @@ static int rs5c372_detach(struct i2c_client *client) return 0; } +static struct i2c_driver rs5c372_driver = { + .driver = { + .name = "rtc-rs5c372", + }, + .attach_adapter = &rs5c372_attach, + .detach_client = &rs5c372_detach, +}; + static __init int rs5c372_init(void) { return i2c_add_driver(&rs5c372_driver); diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 24ee8be359f..6377db1b446 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -217,6 +217,7 @@ static const struct quirk_printer_struct quirk_printers[] = { { 0x0409, 0xbef4, USBLP_QUIRK_BIDIR }, /* NEC Picty760 (HP OEM) */ { 0x0409, 0xf0be, USBLP_QUIRK_BIDIR }, /* NEC Picty920 (HP OEM) */ { 0x0409, 0xf1be, USBLP_QUIRK_BIDIR }, /* NEC Picty800 (HP OEM) */ + { 0x0482, 0x0010, USBLP_QUIRK_BIDIR }, /* Kyocera Mita FS 820, by zut <kernel@zut.de> */ { 0, 0 } }; diff --git a/drivers/usb/core/endpoint.c b/drivers/usb/core/endpoint.c index c505b767cee..5e628ae3aec 100644 --- a/drivers/usb/core/endpoint.c +++ b/drivers/usb/core/endpoint.c @@ -268,6 +268,7 @@ static void ep_device_release(struct device *dev) struct ep_device *ep_dev = to_ep_device(dev); dev_dbg(dev, "%s called for %s\n", __FUNCTION__, dev->bus_id); + endpoint_free_minor(ep_dev); kfree(ep_dev); } @@ -349,7 +350,6 @@ void usb_remove_ep_files(struct usb_host_endpoint *endpoint) sprintf(name, "ep_%02x", endpoint->desc.bEndpointAddress); sysfs_remove_link(&ep_dev->dev.parent->kobj, name); sysfs_remove_group(&ep_dev->dev.kobj, &ep_dev_attr_grp); - endpoint_free_minor(ep_dev); device_unregister(&ep_dev->dev); endpoint->ep_dev = NULL; destroy_endpoint_class(); diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index 15d77c30793..cdcfd42843d 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -42,6 +42,7 @@ #include <linux/usb_gadget.h> #include <linux/usb/otg.h> #include <linux/dma-mapping.h> +#include <linux/clk.h> #include <asm/byteorder.h> #include <asm/io.h> @@ -60,6 +61,11 @@ /* bulk DMA seems to be behaving for both IN and OUT */ #define USE_DMA +/* FIXME: OMAP2 currently has some problem in DMA mode */ +#ifdef CONFIG_ARCH_OMAP2 +#undef USE_DMA +#endif + /* ISO too */ #define USE_ISO @@ -99,7 +105,7 @@ static unsigned fifo_mode = 0; * boot parameter "omap_udc:fifo_mode=42" */ module_param (fifo_mode, uint, 0); -MODULE_PARM_DESC (fifo_mode, "endpoint setup (0 == default)"); +MODULE_PARM_DESC (fifo_mode, "endpoint configuration"); #ifdef USE_DMA static unsigned use_dma = 1; @@ -122,7 +128,7 @@ static const char driver_desc [] = DRIVER_DESC; /*-------------------------------------------------------------------------*/ /* there's a notion of "current endpoint" for modifying endpoint - * state, and PIO access to its FIFO. + * state, and PIO access to its FIFO. */ static void use_ep(struct omap_ep *ep, u16 select) @@ -391,7 +397,7 @@ done(struct omap_ep *ep, struct omap_req *req, int status) #define FIFO_EMPTY (UDC_NON_ISO_FIFO_EMPTY | UDC_ISO_FIFO_EMPTY) #define FIFO_UNREADABLE (UDC_EP_HALTED | FIFO_EMPTY) -static inline int +static inline int write_packet(u8 *buf, struct omap_req *req, unsigned max) { unsigned len; @@ -456,7 +462,7 @@ static int write_fifo(struct omap_ep *ep, struct omap_req *req) return is_last; } -static inline int +static inline int read_packet(u8 *buf, struct omap_req *req, unsigned avail) { unsigned len; @@ -542,9 +548,9 @@ static inline dma_addr_t dma_csac(unsigned lch) /* omap 3.2/3.3 erratum: sometimes 0 is returned if CSAC/CDAC is * read before the DMA controller finished disabling the channel. */ - csac = omap_readw(OMAP_DMA_CSAC(lch)); + csac = OMAP_DMA_CSAC_REG(lch); if (csac == 0) - csac = omap_readw(OMAP_DMA_CSAC(lch)); + csac = OMAP_DMA_CSAC_REG(lch); return csac; } @@ -555,9 +561,9 @@ static inline dma_addr_t dma_cdac(unsigned lch) /* omap 3.2/3.3 erratum: sometimes 0 is returned if CSAC/CDAC is * read before the DMA controller finished disabling the channel. */ - cdac = omap_readw(OMAP_DMA_CDAC(lch)); + cdac = OMAP_DMA_CDAC_REG(lch); if (cdac == 0) - cdac = omap_readw(OMAP_DMA_CDAC(lch)); + cdac = OMAP_DMA_CDAC_REG(lch); return cdac; } @@ -582,7 +588,7 @@ static u16 dma_src_len(struct omap_ep *ep, dma_addr_t start) } #define DMA_DEST_LAST(x) (cpu_is_omap15xx() \ - ? omap_readw(OMAP_DMA_CSAC(x)) /* really: CPC */ \ + ? OMAP_DMA_CSAC_REG(x) /* really: CPC */ \ : dma_cdac(x)) static u16 dma_dest_len(struct omap_ep *ep, dma_addr_t start) @@ -620,17 +626,19 @@ static void next_in_dma(struct omap_ep *ep, struct omap_req *req) || (cpu_is_omap15xx() && length < ep->maxpacket)) { txdma_ctrl = UDC_TXN_EOT | length; omap_set_dma_transfer_params(ep->lch, OMAP_DMA_DATA_TYPE_S8, - length, 1, sync_mode); + length, 1, sync_mode, 0, 0); } else { length = min(length / ep->maxpacket, (unsigned) UDC_TXN_TSC + 1); - txdma_ctrl = length; + txdma_ctrl = length; omap_set_dma_transfer_params(ep->lch, OMAP_DMA_DATA_TYPE_S16, - ep->ep.maxpacket >> 1, length, sync_mode); + ep->ep.maxpacket >> 1, length, sync_mode, + 0, 0); length *= ep->maxpacket; } omap_set_dma_src_params(ep->lch, OMAP_DMA_PORT_EMIFF, - OMAP_DMA_AMODE_POST_INC, req->req.dma + req->req.actual); + OMAP_DMA_AMODE_POST_INC, req->req.dma + req->req.actual, + 0, 0); omap_start_dma(ep->lch); ep->dma_counter = dma_csac(ep->lch); @@ -675,9 +683,11 @@ static void next_out_dma(struct omap_ep *ep, struct omap_req *req) req->dma_bytes = packets * ep->ep.maxpacket; omap_set_dma_transfer_params(ep->lch, OMAP_DMA_DATA_TYPE_S16, ep->ep.maxpacket >> 1, packets, - OMAP_DMA_SYNC_ELEMENT); + OMAP_DMA_SYNC_ELEMENT, + 0, 0); omap_set_dma_dest_params(ep->lch, OMAP_DMA_PORT_EMIFF, - OMAP_DMA_AMODE_POST_INC, req->req.dma + req->req.actual); + OMAP_DMA_AMODE_POST_INC, req->req.dma + req->req.actual, + 0, 0); ep->dma_counter = DMA_DEST_LAST(ep->lch); UDC_RXDMA_REG(ep->dma_channel) = UDC_RXN_STOP | (packets - 1); @@ -820,7 +830,8 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel) omap_set_dma_dest_params(ep->lch, OMAP_DMA_PORT_TIPB, OMAP_DMA_AMODE_CONSTANT, - (unsigned long) io_v2p((u32)&UDC_DATA_DMA_REG)); + (unsigned long) io_v2p((u32)&UDC_DATA_DMA_REG), + 0, 0); } } else { status = omap_request_dma(OMAP_DMA_USB_W2FC_RX0 - 1 + channel, @@ -831,7 +842,8 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel) omap_set_dma_src_params(ep->lch, OMAP_DMA_PORT_TIPB, OMAP_DMA_AMODE_CONSTANT, - (unsigned long) io_v2p((u32)&UDC_DATA_DMA_REG)); + (unsigned long) io_v2p((u32)&UDC_DATA_DMA_REG), + 0, 0); /* EMIFF */ omap_set_dma_dest_burst_mode(ep->lch, OMAP_DMA_DATA_BURST_4); @@ -846,7 +858,7 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel) /* channel type P: hw synch (fifo) */ if (!cpu_is_omap15xx()) - omap_writew(2, OMAP_DMA_LCH_CTRL(ep->lch)); + OMAP1_DMA_LCH_CTRL_REG(ep->lch) = 2; } just_restart: @@ -893,7 +905,7 @@ static void dma_channel_release(struct omap_ep *ep) else req = NULL; - active = ((1 << 7) & omap_readl(OMAP_DMA_CCR(ep->lch))) != 0; + active = ((1 << 7) & OMAP_DMA_CCR_REG(ep->lch)) != 0; DBG("%s release %s %cxdma%d %p\n", ep->ep.name, active ? "active" : "idle", @@ -1117,7 +1129,7 @@ static int omap_ep_dequeue(struct usb_ep *_ep, struct usb_request *_req) */ dma_channel_release(ep); dma_channel_claim(ep, channel); - } else + } else done(ep, req, -ECONNRESET); spin_unlock_irqrestore(&ep->udc->lock, flags); return 0; @@ -1153,7 +1165,7 @@ static int omap_ep_set_halt(struct usb_ep *_ep, int value) /* IN endpoints must already be idle */ if ((ep->bEndpointAddress & USB_DIR_IN) - && !list_empty(&ep->queue)) { + && !list_empty(&ep->queue)) { status = -EAGAIN; goto done; } @@ -1298,6 +1310,23 @@ static void pullup_disable(struct omap_udc *udc) UDC_SYSCON1_REG &= ~UDC_PULLUP_EN; } +static struct omap_udc *udc; + +static void omap_udc_enable_clock(int enable) +{ + if (udc == NULL || udc->dc_clk == NULL || udc->hhc_clk == NULL) + return; + + if (enable) { + clk_enable(udc->dc_clk); + clk_enable(udc->hhc_clk); + udelay(100); + } else { + clk_disable(udc->hhc_clk); + clk_disable(udc->dc_clk); + } +} + /* * Called by whatever detects VBUS sessions: external transceiver * driver, or maybe GPIO0 VBUS IRQ. May request 48 MHz clock. @@ -1318,10 +1347,22 @@ static int omap_vbus_session(struct usb_gadget *gadget, int is_active) else FUNC_MUX_CTRL_0_REG &= ~VBUS_CTRL_1510; } + if (udc->dc_clk != NULL && is_active) { + if (!udc->clk_requested) { + omap_udc_enable_clock(1); + udc->clk_requested = 1; + } + } if (can_pullup(udc)) pullup_enable(udc); else pullup_disable(udc); + if (udc->dc_clk != NULL && !is_active) { + if (udc->clk_requested) { + omap_udc_enable_clock(0); + udc->clk_requested = 0; + } + } spin_unlock_irqrestore(&udc->lock, flags); return 0; } @@ -1441,7 +1482,7 @@ static void ep0_irq(struct omap_udc *udc, u16 irq_src) } } - /* IN/OUT packets mean we're in the DATA or STATUS stage. + /* IN/OUT packets mean we're in the DATA or STATUS stage. * This driver uses only uses protocol stalls (ep0 never halts), * and if we got this far the gadget driver already had a * chance to stall. Tries to be forgiving of host oddities. @@ -1509,7 +1550,7 @@ static void ep0_irq(struct omap_udc *udc, u16 irq_src) } else if (stat == 0) UDC_CTRL_REG = UDC_SET_FIFO_EN; UDC_EP_NUM_REG = 0; - + /* activate status stage */ if (stat == 1) { done(ep0, req, 0); @@ -1866,7 +1907,7 @@ static void pio_out_timer(unsigned long _ep) spin_lock_irqsave(&ep->udc->lock, flags); if (!list_empty(&ep->queue) && ep->ackwait) { - use_ep(ep, 0); + use_ep(ep, UDC_EP_SEL); stat_flg = UDC_STAT_FLG_REG; if ((stat_flg & UDC_ACK) && (!(stat_flg & UDC_FIFO_EN) @@ -1876,12 +1917,12 @@ static void pio_out_timer(unsigned long _ep) VDBG("%s: lose, %04x\n", ep->ep.name, stat_flg); req = container_of(ep->queue.next, struct omap_req, queue); - UDC_EP_NUM_REG = ep->bEndpointAddress | UDC_EP_SEL; (void) read_fifo(ep, req); UDC_EP_NUM_REG = ep->bEndpointAddress; UDC_CTRL_REG = UDC_SET_FIFO_EN; ep->ackwait = 1 + ep->double_buf; - } + } else + deselect_ep(); } mod_timer(&ep->timer, PIO_OUT_TIMEOUT); spin_unlock_irqrestore(&ep->udc->lock, flags); @@ -2028,7 +2069,17 @@ static irqreturn_t omap_udc_iso_irq(int irq, void *_dev) /*-------------------------------------------------------------------------*/ -static struct omap_udc *udc; +static inline int machine_needs_vbus_session(void) +{ + return (machine_is_omap_innovator() + || machine_is_omap_osk() + || machine_is_omap_apollon() +#ifndef CONFIG_MACH_OMAP_H4_OTG + || machine_is_omap_h4() +#endif + || machine_is_sx1() + ); +} int usb_gadget_register_driver (struct usb_gadget_driver *driver) { @@ -2070,6 +2121,9 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver) udc->gadget.dev.driver = &driver->driver; spin_unlock_irqrestore(&udc->lock, flags); + if (udc->dc_clk != NULL) + omap_udc_enable_clock(1); + status = driver->bind (&udc->gadget); if (status) { DBG("bind to %s --> %d\n", driver->driver.name, status); @@ -2103,10 +2157,12 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver) /* boards that don't have VBUS sensing can't autogate 48MHz; * can't enter deep sleep while a gadget driver is active. */ - if (machine_is_omap_innovator() || machine_is_omap_osk()) + if (machine_needs_vbus_session()) omap_vbus_session(&udc->gadget, 1); done: + if (udc->dc_clk != NULL) + omap_udc_enable_clock(0); return status; } EXPORT_SYMBOL(usb_gadget_register_driver); @@ -2121,7 +2177,10 @@ int usb_gadget_unregister_driver (struct usb_gadget_driver *driver) if (!driver || driver != udc->driver || !driver->unbind) return -EINVAL; - if (machine_is_omap_innovator() || machine_is_omap_osk()) + if (udc->dc_clk != NULL) + omap_udc_enable_clock(1); + + if (machine_needs_vbus_session()) omap_vbus_session(&udc->gadget, 0); if (udc->transceiver) @@ -2137,6 +2196,8 @@ int usb_gadget_unregister_driver (struct usb_gadget_driver *driver) udc->gadget.dev.driver = NULL; udc->driver = NULL; + if (udc->dc_clk != NULL) + omap_udc_enable_clock(0); DBG("unregistered driver '%s'\n", driver->driver.name); return status; } @@ -2219,7 +2280,7 @@ static char *trx_mode(unsigned m, int enabled) case 0: return enabled ? "*6wire" : "unused"; case 1: return "4wire"; case 2: return "3wire"; - case 3: return "6wire"; + case 3: return "6wire"; default: return "unknown"; } } @@ -2228,11 +2289,18 @@ static int proc_otg_show(struct seq_file *s) { u32 tmp; u32 trans; + char *ctrl_name; tmp = OTG_REV_REG; - trans = USB_TRANSCEIVER_CTRL_REG; - seq_printf(s, "\nOTG rev %d.%d, transceiver_ctrl %05x\n", - tmp >> 4, tmp & 0xf, trans); + if (cpu_is_omap24xx()) { + ctrl_name = "control_devconf"; + trans = CONTROL_DEVCONF_REG; + } else { + ctrl_name = "tranceiver_ctrl"; + trans = USB_TRANSCEIVER_CTRL_REG; + } + seq_printf(s, "\nOTG rev %d.%d, %s %05x\n", + tmp >> 4, tmp & 0xf, ctrl_name, trans); tmp = OTG_SYSCON_1_REG; seq_printf(s, "otg_syscon1 %08x usb2 %s, usb1 %s, usb0 %s," FOURBITS "\n", tmp, @@ -2307,7 +2375,7 @@ static int proc_udc_show(struct seq_file *s, void *_) driver_desc, use_dma ? " (dma)" : ""); - tmp = UDC_REV_REG & 0xff; + tmp = UDC_REV_REG & 0xff; seq_printf(s, "UDC rev %d.%d, fifo mode %d, gadget %s\n" "hmc %d, transceiver %s\n", @@ -2315,11 +2383,16 @@ static int proc_udc_show(struct seq_file *s, void *_) fifo_mode, udc->driver ? udc->driver->driver.name : "(none)", HMC, - udc->transceiver ? udc->transceiver->label : "(none)"); - seq_printf(s, "ULPD control %04x req %04x status %04x\n", - __REG16(ULPD_CLOCK_CTRL), - __REG16(ULPD_SOFT_REQ), - __REG16(ULPD_STATUS_REQ)); + udc->transceiver + ? udc->transceiver->label + : ((cpu_is_omap1710() || cpu_is_omap24xx()) + ? "external" : "(none)")); + if (cpu_class_is_omap1()) { + seq_printf(s, "ULPD control %04x req %04x status %04x\n", + __REG16(ULPD_CLOCK_CTRL), + __REG16(ULPD_SOFT_REQ), + __REG16(ULPD_STATUS_REQ)); + } /* OTG controller registers */ if (!cpu_is_omap15xx()) @@ -2504,9 +2577,10 @@ omap_ep_setup(char *name, u8 addr, u8 type, dbuf = 1; } else { /* double-buffering "not supported" on 15xx, - * and ignored for PIO-IN on 16xx + * and ignored for PIO-IN on newer chips + * (for more reliable behavior) */ - if (!use_dma || cpu_is_omap15xx()) + if (!use_dma || cpu_is_omap15xx() || cpu_is_omap24xx()) dbuf = 0; switch (maxp) { @@ -2549,7 +2623,7 @@ omap_ep_setup(char *name, u8 addr, u8 type, ep->bEndpointAddress = addr; ep->bmAttributes = type; ep->double_buf = dbuf; - ep->udc = udc; + ep->udc = udc; ep->ep.name = ep->name; ep->ep.ops = &omap_ep_ops; @@ -2709,15 +2783,37 @@ static int __init omap_udc_probe(struct platform_device *pdev) struct otg_transceiver *xceiv = NULL; const char *type = NULL; struct omap_usb_config *config = pdev->dev.platform_data; + struct clk *dc_clk; + struct clk *hhc_clk; /* NOTE: "knows" the order of the resources! */ - if (!request_mem_region(pdev->resource[0].start, + if (!request_mem_region(pdev->resource[0].start, pdev->resource[0].end - pdev->resource[0].start + 1, driver_name)) { DBG("request_mem_region failed\n"); return -EBUSY; } + if (cpu_is_omap16xx()) { + dc_clk = clk_get(&pdev->dev, "usb_dc_ck"); + hhc_clk = clk_get(&pdev->dev, "usb_hhc_ck"); + BUG_ON(IS_ERR(dc_clk) || IS_ERR(hhc_clk)); + /* can't use omap_udc_enable_clock yet */ + clk_enable(dc_clk); + clk_enable(hhc_clk); + udelay(100); + } + + if (cpu_is_omap24xx()) { + dc_clk = clk_get(&pdev->dev, "usb_fck"); + hhc_clk = clk_get(&pdev->dev, "usb_l4_ick"); + BUG_ON(IS_ERR(dc_clk) || IS_ERR(hhc_clk)); + /* can't use omap_udc_enable_clock yet */ + clk_enable(dc_clk); + clk_enable(hhc_clk); + udelay(100); + } + INFO("OMAP UDC rev %d.%d%s\n", UDC_REV_REG >> 4, UDC_REV_REG & 0xf, config->otg ? ", Mini-AB" : ""); @@ -2727,7 +2823,7 @@ static int __init omap_udc_probe(struct platform_device *pdev) hmc = HMC_1510; type = "(unknown)"; - if (machine_is_omap_innovator()) { + if (machine_is_omap_innovator() || machine_is_sx1()) { /* just set up software VBUS detect, and then * later rig it so we always report VBUS. * FIXME without really sensing VBUS, we can't @@ -2756,6 +2852,15 @@ static int __init omap_udc_probe(struct platform_device *pdev) } hmc = HMC_1610; + + if (cpu_is_omap24xx()) { + /* this could be transceiverless in one of the + * "we don't need to know" modes. + */ + type = "external"; + goto known; + } + switch (hmc) { case 0: /* POWERUP DEFAULT == 0 */ case 4: @@ -2794,6 +2899,7 @@ bad_on_1710: goto cleanup0; } } +known: INFO("hmc mode %d, %s transceiver\n", hmc, type); /* a "gadget" abstracts/virtualizes the controller */ @@ -2818,8 +2924,8 @@ bad_on_1710: status = request_irq(pdev->resource[1].start, omap_udc_irq, IRQF_SAMPLE_RANDOM, driver_name, udc); if (status != 0) { - ERR( "can't get irq %ld, err %d\n", - pdev->resource[1].start, status); + ERR("can't get irq %d, err %d\n", + (int) pdev->resource[1].start, status); goto cleanup1; } @@ -2827,24 +2933,41 @@ bad_on_1710: status = request_irq(pdev->resource[2].start, omap_udc_pio_irq, IRQF_SAMPLE_RANDOM, "omap_udc pio", udc); if (status != 0) { - ERR( "can't get irq %ld, err %d\n", - pdev->resource[2].start, status); + ERR("can't get irq %d, err %d\n", + (int) pdev->resource[2].start, status); goto cleanup2; } #ifdef USE_ISO status = request_irq(pdev->resource[3].start, omap_udc_iso_irq, IRQF_DISABLED, "omap_udc iso", udc); if (status != 0) { - ERR("can't get irq %ld, err %d\n", - pdev->resource[3].start, status); + ERR("can't get irq %d, err %d\n", + (int) pdev->resource[3].start, status); goto cleanup3; } #endif + if (cpu_is_omap16xx()) { + udc->dc_clk = dc_clk; + udc->hhc_clk = hhc_clk; + clk_disable(hhc_clk); + clk_disable(dc_clk); + } + + if (cpu_is_omap24xx()) { + udc->dc_clk = dc_clk; + udc->hhc_clk = hhc_clk; + /* FIXME OMAP2 don't release hhc & dc clock */ +#if 0 + clk_disable(hhc_clk); + clk_disable(dc_clk); +#endif + } create_proc_file(); - device_add(&udc->gadget.dev); - return 0; - + status = device_add(&udc->gadget.dev); + if (!status) + return status; + /* If fail, fall through */ #ifdef USE_ISO cleanup3: free_irq(pdev->resource[2].start, udc); @@ -2860,8 +2983,17 @@ cleanup1: cleanup0: if (xceiv) put_device(xceiv->dev); + + if (cpu_is_omap16xx() || cpu_is_omap24xx()) { + clk_disable(hhc_clk); + clk_disable(dc_clk); + clk_put(hhc_clk); + clk_put(dc_clk); + } + release_mem_region(pdev->resource[0].start, pdev->resource[0].end - pdev->resource[0].start + 1); + return status; } @@ -2891,6 +3023,13 @@ static int __exit omap_udc_remove(struct platform_device *pdev) free_irq(pdev->resource[2].start, udc); free_irq(pdev->resource[1].start, udc); + if (udc->dc_clk) { + if (udc->clk_requested) + omap_udc_enable_clock(0); + clk_put(udc->hhc_clk); + clk_put(udc->dc_clk); + } + release_mem_region(pdev->resource[0].start, pdev->resource[0].end - pdev->resource[0].start + 1); diff --git a/drivers/usb/gadget/omap_udc.h b/drivers/usb/gadget/omap_udc.h index 652ee462734..1dc398bb9ab 100644 --- a/drivers/usb/gadget/omap_udc.h +++ b/drivers/usb/gadget/omap_udc.h @@ -175,6 +175,9 @@ struct omap_udc { unsigned ep0_reset_config:1; unsigned ep0_setup:1; struct completion *done; + struct clk *dc_clk; + struct clk *hhc_clk; + unsigned clk_requested:1; }; /*-------------------------------------------------------------------------*/ diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index acd101caeee..e0d4c2358b3 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -209,24 +209,16 @@ static int resume_detect_interrupts_are_broken(struct uhci_hcd *uhci) static int remote_wakeup_is_broken(struct uhci_hcd *uhci) { - static struct dmi_system_id broken_wakeup_table[] = { - { - .ident = "Asus A7V8X", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK"), - DMI_MATCH(DMI_BOARD_NAME, "A7V8X"), - DMI_MATCH(DMI_BOARD_VERSION, "REV 1.xx"), - } - }, - { } - }; int port; + char *sys_info; + static char bad_Asus_board[] = "A7V8X"; /* One of Asus's motherboards has a bug which causes it to * wake up immediately from suspend-to-RAM if any of the ports * are connected. In such cases we will not set EGSM. */ - if (dmi_check_system(broken_wakeup_table)) { + sys_info = dmi_get_system_info(DMI_BOARD_NAME); + if (sys_info && !strcmp(sys_info, bad_Asus_board)) { for (port = 0; port < uhci->rh_numports; ++port) { if (inw(uhci->io_addr + USBPORTSC1 + port * 2) & USBPORTSC_CCS) @@ -265,7 +257,9 @@ __acquires(uhci->lock) int_enable = USBINTR_RESUME; if (remote_wakeup_is_broken(uhci)) egsm_enable = 0; - if (resume_detect_interrupts_are_broken(uhci) || !egsm_enable) + if (resume_detect_interrupts_are_broken(uhci) || !egsm_enable || + !device_may_wakeup( + &uhci_to_hcd(uhci)->self.root_hub->dev)) uhci->working_RD = int_enable = 0; outw(int_enable, uhci->io_addr + USBINTR); diff --git a/drivers/usb/input/Kconfig b/drivers/usb/input/Kconfig index f877cd4f317..258a5d09d3d 100644 --- a/drivers/usb/input/Kconfig +++ b/drivers/usb/input/Kconfig @@ -12,10 +12,8 @@ config USB_HID ---help--- Say Y here if you want full HID support to connect USB keyboards, mice, joysticks, graphic tablets, or any other HID based devices - to your computer via USB. You also need to select HID Input layer - support (below) if you want to use keyboards, mice, joysticks and - the like ... as well as Uninterruptible Power Supply (UPS) and - monitor control devices. + to your computer via USB, as well as Uninterruptible Power Supply + (UPS) and monitor control devices. You can't use this driver and the HIDBP (Boot Protocol) keyboard and mouse drivers at the same time. More information is available: diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c index bf26c3c5699..9148694627d 100644 --- a/drivers/usb/misc/sisusbvga/sisusb_con.c +++ b/drivers/usb/misc/sisusbvga/sisusb_con.c @@ -403,7 +403,7 @@ sisusbcon_putc(struct vc_data *c, int ch, int y, int x) sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(x, y), - (u32)SISUSB_HADDR(x, y), 2, &written); + (long)SISUSB_HADDR(x, y), 2, &written); mutex_unlock(&sisusb->lock); } @@ -438,7 +438,7 @@ sisusbcon_putcs(struct vc_data *c, const unsigned short *s, } sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(x, y), - (u32)SISUSB_HADDR(x, y), count * 2, &written); + (long)SISUSB_HADDR(x, y), count * 2, &written); mutex_unlock(&sisusb->lock); } @@ -492,7 +492,7 @@ sisusbcon_clear(struct vc_data *c, int y, int x, int height, int width) sisusb_copy_memory(sisusb, (unsigned char *)SISUSB_VADDR(x, y), - (u32)SISUSB_HADDR(x, y), length, &written); + (long)SISUSB_HADDR(x, y), length, &written); mutex_unlock(&sisusb->lock); } @@ -564,7 +564,7 @@ sisusbcon_bmove(struct vc_data *c, int sy, int sx, sisusb_copy_memory(sisusb, (unsigned char *)SISUSB_VADDR(dx, dy), - (u32)SISUSB_HADDR(dx, dy), length, &written); + (long)SISUSB_HADDR(dx, dy), length, &written); mutex_unlock(&sisusb->lock); } @@ -612,7 +612,7 @@ sisusbcon_switch(struct vc_data *c) length); sisusb_copy_memory(sisusb, (unsigned char *)c->vc_origin, - (u32)SISUSB_HADDR(0, 0), + (long)SISUSB_HADDR(0, 0), length, &written); mutex_unlock(&sisusb->lock); @@ -939,7 +939,7 @@ sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb, } sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(0, t), - (u32)SISUSB_HADDR(0, t), length, &written); + (long)SISUSB_HADDR(0, t), length, &written); mutex_unlock(&sisusb->lock); diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c index 95e682e2c9d..f538013965b 100644 --- a/drivers/usb/net/asix.c +++ b/drivers/usb/net/asix.c @@ -920,7 +920,7 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) goto out2; if ((ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, - 0x0000, 0, 0, buf)) < 0) { + 1, 0, 0, buf)) < 0) { dbg("Select PHY #1 failed: %d", ret); goto out2; } diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index 2f4d303ee36..c8999ae5865 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -170,7 +170,7 @@ config USB_SERIAL_FTDI_SIO config USB_SERIAL_FUNSOFT tristate "USB Fundamental Software Dongle Driver" - depends on USB_SERIAL + depends on USB_SERIAL && !(SPARC || SPARC64) ---help--- Say Y here if you want to use the Fundamental Software dongle. diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 819266b7e2f..5ca04e82ea1 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -625,6 +625,9 @@ static int option_send_setup(struct usb_serial_port *port) dbg("%s", __FUNCTION__); + if (port->number != 0) + return 0; + portdata = usb_get_serial_port_data(port); if (port->tty) { diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 5fe7ff441a0..cddef3efba0 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -728,7 +728,7 @@ UNUSUAL_DEV( 0x05ac, 0x1204, 0x0000, 0x9999, "Apple", "iPod", US_SC_DEVICE, US_PR_DEVICE, NULL, - US_FL_FIX_CAPACITY ), + US_FL_FIX_CAPACITY | US_FL_NOT_LOCKABLE ), UNUSUAL_DEV( 0x05ac, 0x1205, 0x0000, 0x9999, "Apple", @@ -1358,6 +1358,21 @@ UNUSUAL_DEV( 0x1370, 0x6828, 0x0110, 0x0110, US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_IGNORE_RESIDUE ), +/* Reported by Francesco Foresti <frafore@tiscali.it> */ +UNUSUAL_DEV( 0x14cd, 0x6600, 0x0201, 0x0201, + "Super Top", + "IDE DEVICE", + US_SC_DEVICE, US_PR_DEVICE, NULL, + US_FL_IGNORE_RESIDUE ), + +/* Reported by Robert Schedel <r.schedel@yahoo.de> + * Note: this is a 'super top' device like the above 14cd/6600 device */ +UNUSUAL_DEV( 0x1652, 0x6600, 0x0201, 0x0201, + "Teac", + "HD-35PUK-B", + US_SC_DEVICE, US_PR_DEVICE, NULL, + US_FL_IGNORE_RESIDUE ), + /* patch submitted by Davide Perini <perini.davide@dpsoftware.org> * and Renato Perini <rperini@email.it> */ diff --git a/drivers/video/backlight/corgi_bl.c b/drivers/video/backlight/corgi_bl.c index 61587ca2cdb..fde1d951812 100644 --- a/drivers/video/backlight/corgi_bl.c +++ b/drivers/video/backlight/corgi_bl.c @@ -121,7 +121,7 @@ static int corgibl_probe(struct platform_device *pdev) machinfo->limit_mask = -1; corgi_backlight_device = backlight_device_register ("corgi-bl", - NULL, &corgibl_data); + &pdev->dev, NULL, &corgibl_data); if (IS_ERR (corgi_backlight_device)) return PTR_ERR (corgi_backlight_device); diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c index 1c569fb543a..c07d8207fb5 100644 --- a/drivers/video/backlight/hp680_bl.c +++ b/drivers/video/backlight/hp680_bl.c @@ -105,7 +105,7 @@ static struct backlight_properties hp680bl_data = { static int __init hp680bl_probe(struct platform_device *dev) { hp680_backlight_device = backlight_device_register ("hp680-bl", - NULL, &hp680bl_data); + &dev->dev, NULL, &hp680bl_data); if (IS_ERR (hp680_backlight_device)) return PTR_ERR (hp680_backlight_device); diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c index 2d7905410b2..fc812d96c31 100644 --- a/drivers/video/backlight/locomolcd.c +++ b/drivers/video/backlight/locomolcd.c @@ -184,7 +184,7 @@ static int locomolcd_probe(struct locomo_dev *ldev) local_irq_restore(flags); - locomolcd_bl_device = backlight_device_register("locomo-bl", NULL, &locomobl_data); + locomolcd_bl_device = backlight_device_register("locomo-bl", &ldev->dev, NULL, &locomobl_data); if (IS_ERR (locomolcd_bl_device)) return PTR_ERR (locomolcd_bl_device); diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index bbfc8625927..b9b2b27b68c 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -53,7 +53,7 @@ static inline int adfs_readname(char *buf, char *ptr, int maxlen) { char *old_buf = buf; - while (*ptr >= ' ' && maxlen--) { + while ((unsigned char)*ptr >= ' ' && maxlen--) { if (*ptr == '/') *buf++ = '.'; else diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 34e6d7b220c..869f5193ecc 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -14,59 +14,307 @@ #include <linux/time.h> #include <linux/smp_lock.h> #include <linux/namei.h> +#include <linux/poll.h> -static int return_EIO(void) + +static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin) +{ + return -EIO; +} + +static ssize_t bad_file_read(struct file *filp, char __user *buf, + size_t size, loff_t *ppos) +{ + return -EIO; +} + +static ssize_t bad_file_write(struct file *filp, const char __user *buf, + size_t siz, loff_t *ppos) +{ + return -EIO; +} + +static ssize_t bad_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + return -EIO; +} + +static ssize_t bad_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + return -EIO; +} + +static int bad_file_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + return -EIO; +} + +static unsigned int bad_file_poll(struct file *filp, poll_table *wait) +{ + return POLLERR; +} + +static int bad_file_ioctl (struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return -EIO; +} + +static long bad_file_unlocked_ioctl(struct file *file, unsigned cmd, + unsigned long arg) +{ + return -EIO; +} + +static long bad_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return -EIO; +} + +static int bad_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + return -EIO; +} + +static int bad_file_open(struct inode *inode, struct file *filp) +{ + return -EIO; +} + +static int bad_file_flush(struct file *file, fl_owner_t id) +{ + return -EIO; +} + +static int bad_file_release(struct inode *inode, struct file *filp) +{ + return -EIO; +} + +static int bad_file_fsync(struct file *file, struct dentry *dentry, + int datasync) +{ + return -EIO; +} + +static int bad_file_aio_fsync(struct kiocb *iocb, int datasync) +{ + return -EIO; +} + +static int bad_file_fasync(int fd, struct file *filp, int on) +{ + return -EIO; +} + +static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl) +{ + return -EIO; +} + +static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos, + size_t count, read_actor_t actor, void *target) +{ + return -EIO; +} + +static ssize_t bad_file_sendpage(struct file *file, struct page *page, + int off, size_t len, loff_t *pos, int more) +{ + return -EIO; +} + +static unsigned long bad_file_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + return -EIO; +} + +static int bad_file_check_flags(int flags) { return -EIO; } -#define EIO_ERROR ((void *) (return_EIO)) +static int bad_file_dir_notify(struct file *file, unsigned long arg) +{ + return -EIO; +} + +static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl) +{ + return -EIO; +} + +static ssize_t bad_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags) +{ + return -EIO; +} + +static ssize_t bad_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + return -EIO; +} static const struct file_operations bad_file_ops = { - .llseek = EIO_ERROR, - .aio_read = EIO_ERROR, - .read = EIO_ERROR, - .write = EIO_ERROR, - .aio_write = EIO_ERROR, - .readdir = EIO_ERROR, - .poll = EIO_ERROR, - .ioctl = EIO_ERROR, - .mmap = EIO_ERROR, - .open = EIO_ERROR, - .flush = EIO_ERROR, - .release = EIO_ERROR, - .fsync = EIO_ERROR, - .aio_fsync = EIO_ERROR, - .fasync = EIO_ERROR, - .lock = EIO_ERROR, - .sendfile = EIO_ERROR, - .sendpage = EIO_ERROR, - .get_unmapped_area = EIO_ERROR, + .llseek = bad_file_llseek, + .read = bad_file_read, + .write = bad_file_write, + .aio_read = bad_file_aio_read, + .aio_write = bad_file_aio_write, + .readdir = bad_file_readdir, + .poll = bad_file_poll, + .ioctl = bad_file_ioctl, + .unlocked_ioctl = bad_file_unlocked_ioctl, + .compat_ioctl = bad_file_compat_ioctl, + .mmap = bad_file_mmap, + .open = bad_file_open, + .flush = bad_file_flush, + .release = bad_file_release, + .fsync = bad_file_fsync, + .aio_fsync = bad_file_aio_fsync, + .fasync = bad_file_fasync, + .lock = bad_file_lock, + .sendfile = bad_file_sendfile, + .sendpage = bad_file_sendpage, + .get_unmapped_area = bad_file_get_unmapped_area, + .check_flags = bad_file_check_flags, + .dir_notify = bad_file_dir_notify, + .flock = bad_file_flock, + .splice_write = bad_file_splice_write, + .splice_read = bad_file_splice_read, }; +static int bad_inode_create (struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + return -EIO; +} + +static struct dentry *bad_inode_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + return ERR_PTR(-EIO); +} + +static int bad_inode_link (struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + return -EIO; +} + +static int bad_inode_unlink(struct inode *dir, struct dentry *dentry) +{ + return -EIO; +} + +static int bad_inode_symlink (struct inode *dir, struct dentry *dentry, + const char *symname) +{ + return -EIO; +} + +static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry, + int mode) +{ + return -EIO; +} + +static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry) +{ + return -EIO; +} + +static int bad_inode_mknod (struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + return -EIO; +} + +static int bad_inode_rename (struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + return -EIO; +} + +static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, + int buflen) +{ + return -EIO; +} + +static int bad_inode_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + return -EIO; +} + +static int bad_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + return -EIO; +} + +static int bad_inode_setattr(struct dentry *direntry, struct iattr *attrs) +{ + return -EIO; +} + +static int bad_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + return -EIO; +} + +static ssize_t bad_inode_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + return -EIO; +} + +static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer, + size_t buffer_size) +{ + return -EIO; +} + +static int bad_inode_removexattr(struct dentry *dentry, const char *name) +{ + return -EIO; +} + static struct inode_operations bad_inode_ops = { - .create = EIO_ERROR, - .lookup = EIO_ERROR, - .link = EIO_ERROR, - .unlink = EIO_ERROR, - .symlink = EIO_ERROR, - .mkdir = EIO_ERROR, - .rmdir = EIO_ERROR, - .mknod = EIO_ERROR, - .rename = EIO_ERROR, - .readlink = EIO_ERROR, + .create = bad_inode_create, + .lookup = bad_inode_lookup, + .link = bad_inode_link, + .unlink = bad_inode_unlink, + .symlink = bad_inode_symlink, + .mkdir = bad_inode_mkdir, + .rmdir = bad_inode_rmdir, + .mknod = bad_inode_mknod, + .rename = bad_inode_rename, + .readlink = bad_inode_readlink, /* follow_link must be no-op, otherwise unmounting this inode won't work */ - .truncate = EIO_ERROR, - .permission = EIO_ERROR, - .getattr = EIO_ERROR, - .setattr = EIO_ERROR, - .setxattr = EIO_ERROR, - .getxattr = EIO_ERROR, - .listxattr = EIO_ERROR, - .removexattr = EIO_ERROR, + /* put_link returns void */ + /* truncate returns void */ + .permission = bad_inode_permission, + .getattr = bad_inode_getattr, + .setattr = bad_inode_setattr, + .setxattr = bad_inode_setxattr, + .getxattr = bad_inode_getxattr, + .listxattr = bad_inode_listxattr, + .removexattr = bad_inode_removexattr, + /* truncate_range returns void */ }; @@ -88,7 +336,7 @@ static struct inode_operations bad_inode_ops = * on it to fail from this point on. */ -void make_bad_inode(struct inode * inode) +void make_bad_inode(struct inode *inode) { remove_inode_hash(inode); @@ -113,7 +361,7 @@ EXPORT_SYMBOL(make_bad_inode); * Returns true if the inode in question has been marked as bad. */ -int is_bad_inode(struct inode * inode) +int is_bad_inode(struct inode *inode) { return (inode->i_op == &bad_inode_ops); } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d3adfd353ff..7cb28720f90 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -854,13 +854,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */ - if (current->flags & PF_RANDOMIZE) - load_bias = randomize_range(0x10000, - ELF_ET_DYN_BASE, - 0); - else - load_bias = ELF_ET_DYN_BASE; - load_bias = ELF_PAGESTART(load_bias - vaddr); + load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index b8238147577..2e0021e8f36 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -275,6 +275,25 @@ static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk, UFSD("EXIT\n"); } +static void ufs_clear_frags(struct inode *inode, sector_t beg, unsigned int n, + int sync) +{ + struct buffer_head *bh; + sector_t end = beg + n; + + for (; beg < end; ++beg) { + bh = sb_getblk(inode->i_sb, beg); + lock_buffer(bh); + memset(bh->b_data, 0, inode->i_sb->s_blocksize); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + if (IS_SYNC(inode) || sync) + sync_dirty_buffer(bh); + brelse(bh); + } +} + unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment, unsigned goal, unsigned count, int * err, struct page *locked_page) { @@ -350,6 +369,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment, *p = cpu_to_fs32(sb, result); *err = 0; UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); + ufs_clear_frags(inode, result + oldcount, newcount - oldcount, + locked_page != NULL); } unlock_super(sb); UFSD("EXIT, result %u\n", result); @@ -363,6 +384,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment, if (result) { *err = 0; UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); + ufs_clear_frags(inode, result + oldcount, newcount - oldcount, + locked_page != NULL); unlock_super(sb); UFSD("EXIT, result %u\n", result); return result; @@ -398,6 +421,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment, *p = cpu_to_fs32(sb, result); *err = 0; UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); + ufs_clear_frags(inode, result + oldcount, newcount - oldcount, + locked_page != NULL); unlock_super(sb); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index ee1eaa6f4ec..2fbab0aab68 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -156,36 +156,6 @@ out: return ret; } -static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh) -{ - lock_buffer(bh); - memset(bh->b_data, 0, inode->i_sb->s_blocksize); - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - unlock_buffer(bh); - if (IS_SYNC(inode)) - sync_dirty_buffer(bh); -} - -static struct buffer_head * -ufs_clear_frags(struct inode *inode, sector_t beg, - unsigned int n, sector_t want) -{ - struct buffer_head *res = NULL, *bh; - sector_t end = beg + n; - - for (; beg < end; ++beg) { - bh = sb_getblk(inode->i_sb, beg); - ufs_clear_frag(inode, bh); - if (want != beg) - brelse(bh); - else - res = bh; - } - BUG_ON(!res); - return res; -} - /** * ufs_inode_getfrag() - allocate new fragment(s) * @inode - pointer to inode @@ -302,7 +272,7 @@ repeat: } if (!phys) { - result = ufs_clear_frags(inode, tmp, required, tmp + blockoff); + result = sb_getblk(sb, tmp + blockoff); } else { *phys = tmp + blockoff; result = NULL; @@ -403,8 +373,7 @@ repeat: if (!phys) { - result = ufs_clear_frags(inode, tmp, uspi->s_fpb, - tmp + blockoff); + result = sb_getblk(sb, tmp + blockoff); } else { *phys = tmp + blockoff; *new = 1; @@ -471,13 +440,13 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head #define GET_INODE_DATABLOCK(x) \ ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new, bh_result->b_page) #define GET_INODE_PTR(x) \ - ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, bh_result->b_page) + ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, NULL) #define GET_INDIRECT_DATABLOCK(x) \ ufs_inode_getblock(inode, bh, x, fragment, \ - &err, &phys, &new, bh_result->b_page); + &err, &phys, &new, bh_result->b_page) #define GET_INDIRECT_PTR(x) \ ufs_inode_getblock(inode, bh, x, fragment, \ - &err, NULL, NULL, bh_result->b_page); + &err, NULL, NULL, NULL) if (ptr < UFS_NDIR_FRAGMENT) { bh = GET_INODE_DATABLOCK(ptr); diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h index 9e6c23c360b..ebc1f697615 100644 --- a/include/acpi/acconfig.h +++ b/include/acpi/acconfig.h @@ -105,7 +105,7 @@ /* Maximum object reference count (detects object deletion issues) */ -#define ACPI_MAX_REFERENCE_COUNT 0x800 +#define ACPI_MAX_REFERENCE_COUNT 0x1000 /* Size of cached memory mapping for system memory operation region */ diff --git a/include/asm-arm/arch-ep93xx/irqs.h b/include/asm-arm/arch-ep93xx/irqs.h index 9a42f5de9e5..ae532e304bf 100644 --- a/include/asm-arm/arch-ep93xx/irqs.h +++ b/include/asm-arm/arch-ep93xx/irqs.h @@ -22,9 +22,9 @@ #define IRQ_EP93XX_DMAM2P9 16 #define IRQ_EP93XX_DMAM2M0 17 #define IRQ_EP93XX_DMAM2M1 18 -#define IRQ_EP93XX_GPIO0MUX 20 -#define IRQ_EP93XX_GPIO1MUX 21 -#define IRQ_EP93XX_GPIO2MUX 22 +#define IRQ_EP93XX_GPIO0MUX 19 +#define IRQ_EP93XX_GPIO1MUX 20 +#define IRQ_EP93XX_GPIO2MUX 21 #define IRQ_EP93XX_GPIO3MUX 22 #define IRQ_EP93XX_UART1RX 23 #define IRQ_EP93XX_UART1TX 24 diff --git a/include/asm-arm/arch-iop13xx/io.h b/include/asm-arm/arch-iop13xx/io.h index db6de2480a2..5a7bdb52660 100644 --- a/include/asm-arm/arch-iop13xx/io.h +++ b/include/asm-arm/arch-iop13xx/io.h @@ -21,10 +21,11 @@ #define IO_SPACE_LIMIT 0xffffffff -#define __io(a) (a) +#define __io(a) __iop13xx_io(a) #define __mem_pci(a) (a) #define __mem_isa(a) (a) +extern void __iomem * __iop13xx_io(unsigned long io_addr); extern void __iomem * __ioremap(unsigned long, size_t, unsigned long); extern void __iomem *__iop13xx_ioremap(unsigned long cookie, size_t size, unsigned long flags); diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h index 083e03c5639..e24f6b6c79a 100644 --- a/include/asm-arm/arch-pxa/pxa-regs.h +++ b/include/asm-arm/arch-pxa/pxa-regs.h @@ -1626,7 +1626,7 @@ #define SSCR0_RIM (1 << 22) /* Receive FIFO overrrun interrupt mask */ #define SSCR0_TUM (1 << 23) /* Transmit FIFO underrun interrupt mask */ #define SSCR0_FRDC (0x07000000) /* Frame rate divider control (mask) */ -#define SSCR0_SlotsPerFrm(x) ((x) - 1) /* Time slots per frame [1..8] */ +#define SSCR0_SlotsPerFrm(x) (((x) - 1) << 24) /* Time slots per frame [1..8] */ #define SSCR0_ADC (1 << 30) /* Audio clock select */ #define SSCR0_MOD (1 << 31) /* Mode (normal or network) */ #endif @@ -1655,6 +1655,7 @@ #define SSCR0_EDSS (1 << 20) /* Extended Data Size Select */ /* extra bits in PXA255, PXA26x and PXA27x SSP ports */ +#define SSCR0_TISSP (1 << 4) /* TI Sync Serial Protocol */ #define SSCR0_PSP (3 << 4) /* PSP - Programmable Serial Protocol */ #define SSCR1_TTELP (1 << 31) /* TXD Tristate Enable Last Phase */ #define SSCR1_TTE (1 << 30) /* TXD Tristate Enable */ diff --git a/include/asm-arm/arch-s3c2410/dma.h b/include/asm-arm/arch-s3c2410/dma.h index 7ac22483697..58ffa7ba3c8 100644 --- a/include/asm-arm/arch-s3c2410/dma.h +++ b/include/asm-arm/arch-s3c2410/dma.h @@ -14,7 +14,7 @@ #define __ASM_ARCH_DMA_H __FILE__ #include <linux/sysdev.h> -#include "hardware.h" +#include <asm/hardware.h> /* * This is the maximum DMA address(physical address) that can be DMAd to. diff --git a/include/asm-arm/arch-s3c2410/entry-macro.S b/include/asm-arm/arch-s3c2410/entry-macro.S index e09a6b8ec15..1eb4e6b8d24 100644 --- a/include/asm-arm/arch-s3c2410/entry-macro.S +++ b/include/asm-arm/arch-s3c2410/entry-macro.S @@ -20,7 +20,7 @@ #define INTOFFSET (0x14) #include <asm/hardware.h> -#include <asm/arch/irqs.h> +#include <asm/irq.h> .macro get_irqnr_and_base, irqnr, irqstat, base, tmp diff --git a/include/asm-arm/arch-s3c2410/hardware.h b/include/asm-arm/arch-s3c2410/hardware.h index 729565e5cdf..6dadf58ff98 100644 --- a/include/asm-arm/arch-s3c2410/hardware.h +++ b/include/asm-arm/arch-s3c2410/hardware.h @@ -13,6 +13,10 @@ #ifndef __ASM_ARCH_HARDWARE_H #define __ASM_ARCH_HARDWARE_H +#ifndef __ASM_HARDWARE_H +#error "Do not include this directly, instead #include <asm/hardware.h>" +#endif + #ifndef __ASSEMBLY__ /* external functions for GPIO support diff --git a/include/asm-arm/arch-s3c2410/irqs.h b/include/asm-arm/arch-s3c2410/irqs.h index 39a69829d16..4b7cff456c4 100644 --- a/include/asm-arm/arch-s3c2410/irqs.h +++ b/include/asm-arm/arch-s3c2410/irqs.h @@ -12,6 +12,9 @@ #ifndef __ASM_ARCH_IRQS_H #define __ASM_ARCH_IRQS_H __FILE__ +#ifndef __ASM_ARM_IRQ_H +#error "Do not include this directly, instead #include <asm/irq.h>" +#endif /* we keep the first set of CPU IRQs out of the range of * the ISA space, so that the PC104 has them to itself diff --git a/include/asm-arm/cacheflush.h b/include/asm-arm/cacheflush.h index 378a3a2ce8d..d51049522cd 100644 --- a/include/asm-arm/cacheflush.h +++ b/include/asm-arm/cacheflush.h @@ -355,6 +355,8 @@ extern void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, */ extern void flush_dcache_page(struct page *); +extern void __flush_dcache_page(struct address_space *mapping, struct page *page); + #define flush_dcache_mmap_lock(mapping) \ write_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) \ diff --git a/include/asm-arm/flat.h b/include/asm-arm/flat.h index 96694647858..16f5375e57b 100644 --- a/include/asm-arm/flat.h +++ b/include/asm-arm/flat.h @@ -5,7 +5,9 @@ #ifndef __ARM_FLAT_H__ #define __ARM_FLAT_H__ -#define flat_stack_align(sp) /* nothing needed */ +/* An odd number of words will be pushed after this alignment, so + deliberately misalign the value. */ +#define flat_stack_align(sp) sp = (void *)(((unsigned long)(sp) - 4) | 4) #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) diff --git a/include/asm-arm/irq.h b/include/asm-arm/irq.h index 283af50a16c..1b882a255e3 100644 --- a/include/asm-arm/irq.h +++ b/include/asm-arm/irq.h @@ -19,7 +19,6 @@ #define NO_IRQ ((unsigned int)(-1)) #endif -struct irqaction; /* * Migration helpers @@ -37,6 +36,10 @@ struct irqaction; #define IRQT_HIGH (__IRQT_HIGHLVL) #define IRQT_PROBE IRQ_TYPE_PROBE +#ifndef __ASSEMBLY__ +struct irqaction; extern void migrate_irqs(void); #endif +#endif + diff --git a/include/asm-i386/boot.h b/include/asm-i386/boot.h index 8ce79a6fa89..e7686d0a841 100644 --- a/include/asm-i386/boot.h +++ b/include/asm-i386/boot.h @@ -13,7 +13,8 @@ #define ASK_VGA 0xfffd /* ask for it at bootup */ /* Physical address where kenrel should be loaded. */ -#define LOAD_PHYSICAL_ADDR ((0x100000 + CONFIG_PHYSICAL_ALIGN - 1) \ +#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + + (CONFIG_PHYSICAL_ALIGN - 1)) \ & ~(CONFIG_PHYSICAL_ALIGN - 1)) #endif /* _LINUX_BOOT_H */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 28fdce1ac1d..bc8b4616bad 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include <asm/types.h> #include <linux/ioctl.h> -#define KVM_API_VERSION 1 +#define KVM_API_VERSION 2 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -45,6 +45,7 @@ enum kvm_exit_reason { KVM_EXIT_DEBUG = 4, KVM_EXIT_HLT = 5, KVM_EXIT_MMIO = 6, + KVM_EXIT_IRQ_WINDOW_OPEN = 7, }; /* for KVM_RUN */ @@ -53,11 +54,19 @@ struct kvm_run { __u32 vcpu; __u32 emulated; /* skip current instruction */ __u32 mmio_completed; /* mmio request completed */ + __u8 request_interrupt_window; + __u8 padding1[3]; /* out */ __u32 exit_type; __u32 exit_reason; __u32 instruction_length; + __u8 ready_for_interrupt_injection; + __u8 if_flag; + __u16 padding2; + __u64 cr8; + __u64 apic_base; + union { /* KVM_EXIT_UNKNOWN */ struct { diff --git a/include/linux/magic.h b/include/linux/magic.h index 156c40fc664..b78bbf42135 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -3,6 +3,7 @@ #define ADFS_SUPER_MAGIC 0xadf5 #define AFFS_SUPER_MAGIC 0xadff +#define AFS_SUPER_MAGIC 0x5346414F #define AUTOFS_SUPER_MAGIC 0x0187 #define CODA_SUPER_MAGIC 0x73757245 #define EFS_SUPER_MAGIC 0x414A53 diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 778e701eff3..f7a416c52ed 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1277,6 +1277,7 @@ #define PCI_DEVICE_ID_VIA_3296_0 0x0296 #define PCI_DEVICE_ID_VIA_8363_0 0x0305 #define PCI_DEVICE_ID_VIA_P4M800CE 0x0314 +#define PCI_DEVICE_ID_VIA_K8M890CE 0x0336 #define PCI_DEVICE_ID_VIA_8371_0 0x0391 #define PCI_DEVICE_ID_VIA_8501_0 0x0501 #define PCI_DEVICE_ID_VIA_82C561 0x0561 diff --git a/include/linux/swap.h b/include/linux/swap.h index add51cebc8d..5423559a44a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -245,7 +245,7 @@ extern int swap_duplicate(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); extern void free_swap_and_cache(swp_entry_t); -extern int swap_type_of(dev_t, sector_t); +extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); extern sector_t swapdev_block(int, pgoff_t); diff --git a/include/net/tcp.h b/include/net/tcp.h index b7d8317f22a..cd8fa0c858a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -242,7 +242,7 @@ extern int tcp_memory_pressure; static inline int before(__u32 seq1, __u32 seq2) { - return (__s32)(seq2-seq1) > 0; + return (__s32)(seq1-seq2) < 0; } #define after(seq2, seq1) before(seq1, seq2) diff --git a/include/net/x25.h b/include/net/x25.h index 0ad90ebcf86..e47fe440d9d 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -259,6 +259,7 @@ extern int x25_decode(struct sock *, struct sk_buff *, int *, int *, int *, int extern void x25_disconnect(struct sock *, int, unsigned char, unsigned char); /* x25_timer.c */ +extern void x25_init_timers(struct sock *sk); extern void x25_start_heartbeat(struct sock *); extern void x25_start_t2timer(struct sock *); extern void x25_start_t21timer(struct sock *); diff --git a/init/main.c b/init/main.c index 2b1cdaab45e..bc27d72bbb1 100644 --- a/init/main.c +++ b/init/main.c @@ -538,6 +538,11 @@ asmlinkage void __init start_kernel(void) parse_args("Booting kernel", command_line, __start___param, __stop___param - __start___param, &unknown_bootoption); + if (!irqs_disabled()) { + printk(KERN_WARNING "start_kernel(): bug: interrupts were " + "enabled *very* early, fixing it\n"); + local_irq_disable(); + } sort_main_extable(); trap_init(); rcu_init(); diff --git a/kernel/module.c b/kernel/module.c index dbce132b354..d0f2260a021 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1148,10 +1148,10 @@ static int mod_sysfs_setup(struct module *mod, kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); return 0; -out_unreg_drivers: - kobject_unregister(mod->drivers_dir); out_unreg_param: module_param_sysfs_remove(mod); +out_unreg_drivers: + kobject_unregister(mod->drivers_dir); out_unreg: kobject_del(&mod->mkobj.kobj); kobject_put(&mod->mkobj.kobj); @@ -2327,8 +2327,22 @@ void print_modules(void) printk("\n"); } +static char *make_driver_name(struct device_driver *drv) +{ + char *driver_name; + + driver_name = kmalloc(strlen(drv->name) + strlen(drv->bus->name) + 2, + GFP_KERNEL); + if (!driver_name) + return NULL; + + sprintf(driver_name, "%s:%s", drv->bus->name, drv->name); + return driver_name; +} + void module_add_driver(struct module *mod, struct device_driver *drv) { + char *driver_name; int no_warn; if (!mod || !drv) @@ -2336,17 +2350,31 @@ void module_add_driver(struct module *mod, struct device_driver *drv) /* Don't check return codes; these calls are idempotent */ no_warn = sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module"); - no_warn = sysfs_create_link(mod->drivers_dir, &drv->kobj, drv->name); + driver_name = make_driver_name(drv); + if (driver_name) { + no_warn = sysfs_create_link(mod->drivers_dir, &drv->kobj, + driver_name); + kfree(driver_name); + } } EXPORT_SYMBOL(module_add_driver); void module_remove_driver(struct device_driver *drv) { + char *driver_name; + if (!drv) return; + sysfs_remove_link(&drv->kobj, "module"); - if (drv->owner && drv->owner->drivers_dir) - sysfs_remove_link(drv->owner->drivers_dir, drv->name); + if (drv->owner && drv->owner->drivers_dir) { + driver_name = make_driver_name(drv); + if (driver_name) { + sysfs_remove_link(drv->owner->drivers_dir, + driver_name); + kfree(driver_name); + } + } } EXPORT_SYMBOL(module_remove_driver); diff --git a/kernel/params.c b/kernel/params.c index f406655d665..718945da8f5 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -143,9 +143,15 @@ int parse_args(const char *name, while (*args) { int ret; + int irq_was_disabled; args = next_arg(args, ¶m, &val); + irq_was_disabled = irqs_disabled(); ret = parse_one(param, val, params, num, unknown); + if (irq_was_disabled && !irqs_disabled()) { + printk(KERN_WARNING "parse_args(): option '%s' enabled " + "irq's!\n", param); + } switch (ret) { case -ENOENT: printk(KERN_ERR "%s: Unknown parameter `%s'\n", diff --git a/kernel/power/swap.c b/kernel/power/swap.c index f133d4a6d81..3581f8f86ac 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -165,14 +165,15 @@ static int swsusp_swap_check(void) /* This is called before saving image */ { int res; - res = swap_type_of(swsusp_resume_device, swsusp_resume_block); + res = swap_type_of(swsusp_resume_device, swsusp_resume_block, + &resume_bdev); if (res < 0) return res; root_swap = res; - resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_WRITE); - if (IS_ERR(resume_bdev)) - return PTR_ERR(resume_bdev); + res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR); + if (res) + return res; res = set_blocksize(resume_bdev, PAGE_SIZE); if (res < 0) diff --git a/kernel/power/user.c b/kernel/power/user.c index 89443b85163..f7b7a785a5c 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -57,7 +57,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) memset(&data->handle, 0, sizeof(struct snapshot_handle)); if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { data->swap = swsusp_resume_device ? - swap_type_of(swsusp_resume_device, 0) : -1; + swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; } else { data->swap = -1; @@ -268,7 +268,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, * so we need to recode them */ if (old_decode_dev(arg)) { - data->swap = swap_type_of(old_decode_dev(arg), 0); + data->swap = swap_type_of(old_decode_dev(arg), + 0, NULL); if (data->swap < 0) error = -ENODEV; } else { @@ -365,7 +366,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, swdev = old_decode_dev(swap_area.dev); if (swdev) { offset = swap_area.offset; - data->swap = swap_type_of(swdev, offset); + data->swap = swap_type_of(swdev, offset, NULL); if (data->swap < 0) error = -ENODEV; } else { diff --git a/kernel/profile.c b/kernel/profile.c index fb5e03d57e9..11550b2290b 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -63,7 +63,7 @@ static int __init profile_setup(char * str) printk(KERN_INFO "kernel sleep profiling enabled (shift: %ld)\n", prof_shift); - } else if (!strncmp(str, sleepstr, strlen(sleepstr))) { + } else if (!strncmp(str, schedstr, strlen(schedstr))) { prof_on = SCHED_PROFILING; if (str[strlen(schedstr)] == ',') str += strlen(schedstr) + 1; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6969cfb3390..b278b8d60ee 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -61,12 +61,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) } /* - * swapoff can easily use up all memory, so kill those first. - */ - if (p->flags & PF_SWAPOFF) - return ULONG_MAX; - - /* * The memory size of the process is the basis for the badness. */ points = mm->total_vm; @@ -77,6 +71,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) task_unlock(p); /* + * swapoff can easily use up all memory, so kill those first. + */ + if (p->flags & PF_SWAPOFF) + return ULONG_MAX; + + /* * Processes which fork a lot of child processes are likely * a good choice. We add half the vmsize of the children if they * have an own mm. This prevents forking servers to flood the diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8c1a116875b..a49f96b7ea4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -711,6 +711,9 @@ static void __drain_pages(unsigned int cpu) for_each_zone(zone) { struct per_cpu_pageset *pset; + if (!populated_zone(zone)) + continue; + pset = zone_pcp(zone, cpu); for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { struct per_cpu_pages *pcp; @@ -3321,6 +3324,10 @@ void *__init alloc_large_system_hash(const char *tablename, numentries >>= (scale - PAGE_SHIFT); else numentries <<= (PAGE_SHIFT - scale); + + /* Make sure we've got at least a 0-order allocation.. */ + if (unlikely((numentries * bucketsize) < PAGE_SIZE)) + numentries = PAGE_SIZE / bucketsize; } numentries = roundup_pow_of_two(numentries); diff --git a/mm/slab.c b/mm/slab.c index 0d4e57431de..c6100628a6e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3281,7 +3281,7 @@ retry: flags | GFP_THISNODE, nid); } - if (!obj) { + if (!obj && !(flags & __GFP_NO_GROW)) { /* * This allocation will be performed within the constraints * of the current cpuset / memory policy requirements. @@ -3310,7 +3310,7 @@ retry: */ goto retry; } else { - kmem_freepages(cache, obj); + /* cache_grow already freed obj */ obj = NULL; } } diff --git a/mm/swapfile.c b/mm/swapfile.c index b9fc0e5de6d..a2d9bb4e80d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -434,7 +434,7 @@ void free_swap_and_cache(swp_entry_t entry) * * This is needed for the suspend to disk (aka swsusp). */ -int swap_type_of(dev_t device, sector_t offset) +int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) { struct block_device *bdev = NULL; int i; @@ -450,6 +450,9 @@ int swap_type_of(dev_t device, sector_t offset) continue; if (!bdev) { + if (bdev_p) + *bdev_p = sis->bdev; + spin_unlock(&swap_lock); return i; } @@ -459,6 +462,9 @@ int swap_type_of(dev_t device, sector_t offset) se = list_entry(sis->extent_list.next, struct swap_extent, list); if (se->start_block == offset) { + if (bdev_p) + *bdev_p = sis->bdev; + spin_unlock(&swap_lock); bdput(bdev); return i; diff --git a/mm/vmscan.c b/mm/vmscan.c index 40fea491839..7430df68cb6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1406,6 +1406,16 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, return ret; } +static unsigned long count_lru_pages(void) +{ + struct zone *zone; + unsigned long ret = 0; + + for_each_zone(zone) + ret += zone->nr_active + zone->nr_inactive; + return ret; +} + /* * Try to free `nr_pages' of memory, system-wide, and return the number of * freed pages. @@ -1420,7 +1430,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages) unsigned long ret = 0; int pass; struct reclaim_state reclaim_state; - struct zone *zone; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .may_swap = 0, @@ -1431,10 +1440,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) current->reclaim_state = &reclaim_state; - lru_pages = 0; - for_each_zone(zone) - lru_pages += zone->nr_active + zone->nr_inactive; - + lru_pages = count_lru_pages(); nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); /* If slab caches are huge, it's better to hit them first */ while (nr_slab >= lru_pages) { @@ -1461,13 +1467,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages) for (pass = 0; pass < 5; pass++) { int prio; - /* Needed for shrinking slab caches later on */ - if (!lru_pages) - for_each_zone(zone) { - lru_pages += zone->nr_active; - lru_pages += zone->nr_inactive; - } - /* Force reclaiming mapped pages in the passes #3 and #4 */ if (pass > 2) { sc.may_swap = 1; @@ -1483,7 +1482,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages) goto out; reclaim_state.reclaimed_slab = 0; - shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages); + shrink_slab(sc.nr_scanned, sc.gfp_mask, + count_lru_pages()); ret += reclaim_state.reclaimed_slab; if (ret >= nr_pages) goto out; @@ -1491,20 +1491,19 @@ unsigned long shrink_all_memory(unsigned long nr_pages) if (sc.nr_scanned && prio < DEF_PRIORITY - 2) congestion_wait(WRITE, HZ / 10); } - - lru_pages = 0; } /* * If ret = 0, we could not shrink LRUs, but there may be something * in slab caches */ - if (!ret) + if (!ret) { do { reclaim_state.reclaimed_slab = 0; - shrink_slab(nr_pages, sc.gfp_mask, lru_pages); + shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages()); ret += reclaim_state.reclaimed_slab; } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); + } out: current->reclaim_state = NULL; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index bee558a4180..6c84ccb8c9d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -610,7 +610,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, struct ebt_entry_target *t; struct ebt_target *target; unsigned int i, j, hook = 0, hookmask = 0; - size_t gap = e->next_offset - e->target_offset; + size_t gap; int ret; /* don't mess with the struct ebt_entries */ @@ -660,6 +660,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, if (ret != 0) goto cleanup_watchers; t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); + gap = e->next_offset - e->target_offset; target = find_target_lock(t->u.name, &ret, &ebt_mutex); if (!target) goto cleanup_watchers; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1897a3a385d..04d4b93c68e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -148,6 +148,7 @@ #include <linux/seq_file.h> #include <linux/wait.h> #include <linux/etherdevice.h> +#include <linux/kthread.h> #include <net/checksum.h> #include <net/ipv6.h> #include <net/addrconf.h> @@ -360,8 +361,7 @@ struct pktgen_thread { spinlock_t if_lock; struct list_head if_list; /* All device here */ struct list_head th_list; - int removed; - char name[32]; + struct task_struct *tsk; char result[512]; u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ @@ -1689,7 +1689,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); seq_printf(seq, "Name: %s max_before_softirq: %d\n", - t->name, t->max_before_softirq); + t->tsk->comm, t->max_before_softirq); seq_printf(seq, "Running: "); @@ -3112,7 +3112,7 @@ static void pktgen_rem_thread(struct pktgen_thread *t) { /* Remove from the thread list */ - remove_proc_entry(t->name, pg_proc_dir); + remove_proc_entry(t->tsk->comm, pg_proc_dir); mutex_lock(&pktgen_thread_lock); @@ -3260,58 +3260,40 @@ out:; * Main loop of the thread goes here */ -static void pktgen_thread_worker(struct pktgen_thread *t) +static int pktgen_thread_worker(void *arg) { DEFINE_WAIT(wait); + struct pktgen_thread *t = arg; struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - sigset_t tmpsig; u32 max_before_softirq; u32 tx_since_softirq = 0; - daemonize("pktgen/%d", cpu); - - /* Block all signals except SIGKILL, SIGSTOP and SIGTERM */ - - spin_lock_irq(¤t->sighand->siglock); - tmpsig = current->blocked; - siginitsetinv(¤t->blocked, - sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGTERM)); - - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - /* Migrate to the right CPU */ - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) - BUG(); + BUG_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); - t->control &= ~(T_TERMINATE); - t->control &= ~(T_RUN); - t->control &= ~(T_STOP); - t->control &= ~(T_REMDEVALL); - t->control &= ~(T_REMDEV); - t->pid = current->pid; PG_DEBUG(printk("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid)); max_before_softirq = t->max_before_softirq; - __set_current_state(TASK_INTERRUPTIBLE); - mb(); + set_current_state(TASK_INTERRUPTIBLE); - while (1) { - - __set_current_state(TASK_RUNNING); + while (!kthread_should_stop()) { + pkt_dev = next_to_run(t); - /* - * Get next dev to xmit -- if any. - */ + if (!pkt_dev && + (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV)) + == 0) { + prepare_to_wait(&(t->queue), &wait, + TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + finish_wait(&(t->queue), &wait); + } - pkt_dev = next_to_run(t); + __set_current_state(TASK_RUNNING); if (pkt_dev) { @@ -3329,21 +3311,8 @@ static void pktgen_thread_worker(struct pktgen_thread *t) do_softirq(); tx_since_softirq = 0; } - } else { - prepare_to_wait(&(t->queue), &wait, TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); - finish_wait(&(t->queue), &wait); } - /* - * Back from sleep, either due to the timeout or signal. - * We check if we have any "posted" work for us. - */ - - if (t->control & T_TERMINATE || signal_pending(current)) - /* we received a request to terminate ourself */ - break; - if (t->control & T_STOP) { pktgen_stop(t); t->control &= ~(T_STOP); @@ -3364,20 +3333,19 @@ static void pktgen_thread_worker(struct pktgen_thread *t) t->control &= ~(T_REMDEV); } - if (need_resched()) - schedule(); + set_current_state(TASK_INTERRUPTIBLE); } - PG_DEBUG(printk("pktgen: %s stopping all device\n", t->name)); + PG_DEBUG(printk("pktgen: %s stopping all device\n", t->tsk->comm)); pktgen_stop(t); - PG_DEBUG(printk("pktgen: %s removing all device\n", t->name)); + PG_DEBUG(printk("pktgen: %s removing all device\n", t->tsk->comm)); pktgen_rem_all_ifs(t); - PG_DEBUG(printk("pktgen: %s removing thread.\n", t->name)); + PG_DEBUG(printk("pktgen: %s removing thread.\n", t->tsk->comm)); pktgen_rem_thread(t); - t->removed = 1; + return 0; } static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, @@ -3495,37 +3463,11 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return add_dev_to_thread(t, pkt_dev); } -static struct pktgen_thread *__init pktgen_find_thread(const char *name) +static int __init pktgen_create_thread(int cpu) { struct pktgen_thread *t; - - mutex_lock(&pktgen_thread_lock); - - list_for_each_entry(t, &pktgen_threads, th_list) - if (strcmp(t->name, name) == 0) { - mutex_unlock(&pktgen_thread_lock); - return t; - } - - mutex_unlock(&pktgen_thread_lock); - return NULL; -} - -static int __init pktgen_create_thread(const char *name, int cpu) -{ - int err; - struct pktgen_thread *t = NULL; struct proc_dir_entry *pe; - - if (strlen(name) > 31) { - printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n"); - return -EINVAL; - } - - if (pktgen_find_thread(name)) { - printk("pktgen: ERROR: thread: %s already exists\n", name); - return -EINVAL; - } + struct task_struct *p; t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); if (!t) { @@ -3533,14 +3475,29 @@ static int __init pktgen_create_thread(const char *name, int cpu) return -ENOMEM; } - strcpy(t->name, name); spin_lock_init(&t->if_lock); t->cpu = cpu; - pe = create_proc_entry(t->name, 0600, pg_proc_dir); + INIT_LIST_HEAD(&t->if_list); + + list_add_tail(&t->th_list, &pktgen_threads); + + p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); + if (IS_ERR(p)) { + printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); + list_del(&t->th_list); + kfree(t); + return PTR_ERR(p); + } + kthread_bind(p, cpu); + t->tsk = p; + + pe = create_proc_entry(t->tsk->comm, 0600, pg_proc_dir); if (!pe) { printk("pktgen: cannot create %s/%s procfs entry.\n", - PG_PROC_DIR, t->name); + PG_PROC_DIR, t->tsk->comm); + kthread_stop(p); + list_del(&t->th_list); kfree(t); return -EINVAL; } @@ -3548,21 +3505,7 @@ static int __init pktgen_create_thread(const char *name, int cpu) pe->proc_fops = &pktgen_thread_fops; pe->data = t; - INIT_LIST_HEAD(&t->if_list); - - list_add_tail(&t->th_list, &pktgen_threads); - - t->removed = 0; - - err = kernel_thread((void *)pktgen_thread_worker, (void *)t, - CLONE_FS | CLONE_FILES | CLONE_SIGHAND); - if (err < 0) { - printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); - remove_proc_entry(t->name, pg_proc_dir); - list_del(&t->th_list); - kfree(t); - return err; - } + wake_up_process(p); return 0; } @@ -3643,10 +3586,8 @@ static int __init pg_init(void) for_each_online_cpu(cpu) { int err; - char buf[30]; - sprintf(buf, "kpktgend_%i", cpu); - err = pktgen_create_thread(buf, cpu); + err = pktgen_create_thread(cpu); if (err) printk("pktgen: WARNING: Cannot create thread for cpu %d (%d)\n", cpu, err); @@ -3674,9 +3615,8 @@ static void __exit pg_cleanup(void) list_for_each_safe(q, n, &pktgen_threads) { t = list_entry(q, struct pktgen_thread, th_list); - t->control |= (T_TERMINATE); - - wait_event_interruptible_timeout(queue, (t->removed == 1), HZ); + kthread_stop(t->tsk); + kfree(t); } /* Un-register us from receiving netdevice events */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 84bed40273a..25c8a42965d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -165,9 +165,8 @@ struct in_device *inetdev_init(struct net_device *dev) NET_IPV4_NEIGH, "ipv4", NULL, NULL); #endif - /* Account for reference dev->ip_ptr */ + /* Account for reference dev->ip_ptr (below) */ in_dev_hold(in_dev); - rcu_assign_pointer(dev->ip_ptr, in_dev); #ifdef CONFIG_SYSCTL devinet_sysctl_register(in_dev, &in_dev->cnf); @@ -176,6 +175,8 @@ struct in_device *inetdev_init(struct net_device *dev) if (dev->flags & IFF_UP) ip_mc_up(in_dev); out: + /* we can receive as soon as ip_ptr is set -- do this last */ + rcu_assign_pointer(dev->ip_ptr, in_dev); return in_dev; out_kfree: kfree(in_dev); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index a68966059b5..c47ce7076bd 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -15,16 +15,19 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) struct flowi fl = {}; struct dst_entry *odst; unsigned int hh_len; + unsigned int type; + type = inet_addr_type(iph->saddr); if (addr_type == RTN_UNSPEC) - addr_type = inet_addr_type(iph->saddr); + addr_type = type; /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. */ if (addr_type == RTN_LOCAL) { fl.nl_u.ip4_u.daddr = iph->daddr; - fl.nl_u.ip4_u.saddr = iph->saddr; + if (type == RTN_LOCAL) + fl.nl_u.ip4_u.saddr = iph->saddr; fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; fl.mark = (*pskb)->mark; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f6026d4ac42..47bd3ad18b7 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -6,8 +6,8 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER config NF_CONNTRACK_IPV4 - tristate "IPv4 connection tracking support (required for NAT) (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "IPv4 connection tracking support (required for NAT)" + depends on NF_CONNTRACK ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 09696f16aa9..fc1f153c86b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -919,13 +919,13 @@ copy_entries_to_user(unsigned int total_size, #ifdef CONFIG_COMPAT struct compat_delta { struct compat_delta *next; - u_int16_t offset; + unsigned int offset; short delta; }; static struct compat_delta *compat_offsets = NULL; -static int compat_add_offset(u_int16_t offset, short delta) +static int compat_add_offset(unsigned int offset, short delta) { struct compat_delta *tmp; @@ -957,7 +957,7 @@ static void compat_flush_offsets(void) } } -static short compat_calc_jump(u_int16_t offset) +static short compat_calc_jump(unsigned int offset) { struct compat_delta *tmp; short delta; @@ -997,7 +997,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info, void *base, struct xt_table_info *newinfo) { struct ipt_entry_target *t; - u_int16_t entry_offset; + unsigned int entry_offset; int off, i, ret; off = 0; @@ -1467,7 +1467,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, { struct ipt_entry_target *t; struct ipt_target *target; - u_int16_t entry_offset; + unsigned int entry_offset; int ret, off, h, j; duprintf("check_compat_entry_size_and_hooks %p\n", e); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 28b9233956b..d669685afd0 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -127,10 +127,13 @@ masquerade_target(struct sk_buff **pskb, static inline int device_cmp(struct ip_conntrack *i, void *ifindex) { + int ret; #ifdef CONFIG_NF_NAT_NEEDED struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; #endif - int ret; read_lock_bh(&masq_lock); #ifdef CONFIG_NF_NAT_NEEDED diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9b0a9064315..171e5b55d7d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -413,8 +413,6 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (netif_carrier_ok(dev)) ndev->if_flags |= IF_READY; - /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, ndev); ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; @@ -425,6 +423,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) NULL); addrconf_sysctl_register(ndev, &ndev->cnf); #endif + /* protected by rtnl_lock */ + rcu_assign_pointer(dev->ip6_ptr, ndev); return ndev; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 1b853c34d30..cd10e44db01 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -44,8 +44,7 @@ choice depends on NF_CONNTRACK_ENABLED config NF_CONNTRACK_SUPPORT - bool "Layer 3 Independent Connection tracking (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "Layer 3 Independent Connection tracking" help Layer 3 independent connection tracking is experimental scheme which generalize ip_conntrack to support other layer 3 protocols. @@ -122,7 +121,7 @@ config NF_CONNTRACK_EVENTS config NF_CT_PROTO_GRE tristate - depends on EXPERIMENTAL && NF_CONNTRACK + depends on NF_CONNTRACK config NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' @@ -136,8 +135,8 @@ config NF_CT_PROTO_SCTP Documentation/modules.txt. If unsure, say `N'. config NF_CONNTRACK_AMANDA - tristate "Amanda backup protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "Amanda backup protocol support" + depends on NF_CONNTRACK select TEXTSEARCH select TEXTSEARCH_KMP help @@ -151,8 +150,8 @@ config NF_CONNTRACK_AMANDA To compile it as a module, choose M here. If unsure, say N. config NF_CONNTRACK_FTP - tristate "FTP protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "FTP protocol support" + depends on NF_CONNTRACK help Tracking FTP connections is problematic: special helpers are required for tracking them, and doing masquerading and other forms @@ -184,8 +183,8 @@ config NF_CONNTRACK_H323 To compile it as a module, choose M here. If unsure, say N. config NF_CONNTRACK_IRC - tristate "IRC protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "IRC protocol support" + depends on NF_CONNTRACK help There is a commonly-used extension to IRC called Direct Client-to-Client Protocol (DCC). This enables users to send @@ -218,8 +217,8 @@ config NF_CONNTRACK_NETBIOS_NS To compile it as a module, choose M here. If unsure, say N. config NF_CONNTRACK_PPTP - tristate "PPtP protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "PPtP protocol support" + depends on NF_CONNTRACK select NF_CT_PROTO_GRE help This module adds support for PPTP (Point to Point Tunnelling @@ -249,8 +248,8 @@ config NF_CONNTRACK_SIP To compile it as a module, choose M here. If unsure, say N. config NF_CONNTRACK_TFTP - tristate "TFTP protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "TFTP protocol support" + depends on NF_CONNTRACK help TFTP connection tracking helper, this is required depending on how restrictive your ruleset is. diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a5a6e192ac2..f28bf69d3d4 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -745,7 +745,7 @@ static int __init xt_hashlimit_init(void) } hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net); if (!hashlimit_procdir6) { - printk(KERN_ERR "xt_hashlimit: tnable to create proc dir " + printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); goto err4; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 276131fe56d..383dd4e82ee 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -472,8 +472,7 @@ static int netlink_release(struct socket *sock) NETLINK_URELEASE, &n); } - if (nlk->module) - module_put(nlk->module); + module_put(nlk->module); netlink_table_grab(); if (nlk->flags & NETLINK_KERNEL_SOCKET) { diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 52a2726d327..b5c80b18990 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -484,8 +484,6 @@ out: return sk; } -void x25_init_timers(struct sock *sk); - static int x25_create(struct socket *sock, int protocol) { struct sock *sk; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e5372b11fc8..82f36d396fc 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -434,18 +434,19 @@ error_no_put: return NULL; } -static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_usersa_info *p = NLMSG_DATA(nlh); struct xfrm_state *x; int err; struct km_event c; - err = verify_newsa_info(p, (struct rtattr **)xfrma); + err = verify_newsa_info(p, xfrma); if (err) return err; - x = xfrm_state_construct(p, (struct rtattr **)xfrma, &err); + x = xfrm_state_construct(p, xfrma, &err); if (!x) return err; @@ -507,14 +508,15 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, return x; } -static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_state *x; int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = NLMSG_DATA(nlh); - x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); + x = xfrm_user_state_lookup(p, xfrma, &err); if (x == NULL) return err; @@ -672,14 +674,15 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, return skb; } -static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_usersa_id *p = NLMSG_DATA(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; int err = -ESRCH; - x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); + x = xfrm_user_state_lookup(p, xfrma, &err); if (x == NULL) goto out_noput; @@ -718,7 +721,8 @@ static int verify_userspi_info(struct xfrm_userspi_info *p) return 0; } -static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_state *x; struct xfrm_userspi_info *p; @@ -1013,7 +1017,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, return NULL; } -static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); struct xfrm_policy *xp; @@ -1024,11 +1029,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr err = verify_newpolicy_info(p); if (err) return err; - err = verify_sec_ctx_len((struct rtattr **)xfrma); + err = verify_sec_ctx_len(xfrma); if (err) return err; - xp = xfrm_policy_construct(p, (struct rtattr **)xfrma, &err); + xp = xfrm_policy_construct(p, xfrma, &err); if (!xp) return err; @@ -1227,7 +1232,8 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, return skb; } -static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; @@ -1239,7 +1245,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr p = NLMSG_DATA(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + err = copy_from_user_policy_type(&type, xfrma); if (err) return err; @@ -1250,11 +1256,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr if (p->index) xp = xfrm_policy_byid(type, p->dir, p->index, delete); else { - struct rtattr **rtattrs = (struct rtattr **)xfrma; - struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; + struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; struct xfrm_policy tmp; - err = verify_sec_ctx_len(rtattrs); + err = verify_sec_ctx_len(xfrma); if (err) return err; @@ -1302,7 +1307,8 @@ out: return err; } -static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct km_event c; struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); @@ -1367,7 +1373,8 @@ nlmsg_failure: return -1; } -static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_state *x; struct sk_buff *r_skb; @@ -1415,7 +1422,8 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) return err; } -static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_state *x; struct km_event c; @@ -1439,7 +1447,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) goto out; spin_lock_bh(&x->lock); - err = xfrm_update_ae_params(x,(struct rtattr **)xfrma); + err = xfrm_update_ae_params(x, xfrma); spin_unlock_bh(&x->lock); if (err < 0) goto out; @@ -1455,14 +1463,15 @@ out: return err; } -static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct xfrm_audit audit_info; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + err = copy_from_user_policy_type(&type, xfrma); if (err) return err; @@ -1477,7 +1486,8 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **x return 0; } -static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_policy *xp; struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); @@ -1485,18 +1495,17 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + err = copy_from_user_policy_type(&type, xfrma); if (err) return err; if (p->index) xp = xfrm_policy_byid(type, p->dir, p->index, 0); else { - struct rtattr **rtattrs = (struct rtattr **)xfrma; - struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; + struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; struct xfrm_policy tmp; - err = verify_sec_ctx_len(rtattrs); + err = verify_sec_ctx_len(xfrma); if (err) return err; @@ -1537,7 +1546,8 @@ out: return err; } -static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_state *x; int err; @@ -1568,7 +1578,8 @@ out: return err; } -static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_policy *xp; struct xfrm_user_tmpl *ut; @@ -1647,7 +1658,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { #undef XMSGSIZE static struct xfrm_link { - int (*doit)(struct sk_buff *, struct nlmsghdr *, void **); + int (*doit)(struct sk_buff *, struct nlmsghdr *, struct rtattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); } xfrm_dispatch[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, @@ -1735,7 +1746,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err if (link->doit == NULL) goto err_einval; - *errp = link->doit(skb, nlh, (void **) &xfrma); + *errp = link->doit(skb, nlh, xfrma); return *errp; diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 0b2fcc417f5..a8ffc329666 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -925,6 +925,8 @@ ConfigInfoView::ConfigInfoView(QWidget* parent, const char *name) configSettings->endGroup(); connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); } + + has_dbg_info = 0; } void ConfigInfoView::saveSettings(void) @@ -953,10 +955,13 @@ void ConfigInfoView::setInfo(struct menu *m) if (menu == m) return; menu = m; - if (!menu) + if (!menu) { + has_dbg_info = 0; clear(); - else + } else { + has_dbg_info = 1; menuInfo(); + } } void ConfigInfoView::setSource(const QString& name) @@ -991,6 +996,9 @@ void ConfigInfoView::symbolInfo(void) { QString str; + if (!has_dbg_info) + return; + str += "<big>Symbol: <b>"; str += print_filter(sym->name); str += "</b></big><br><br>value: "; diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 6fc1c5f1442..a397edb5adc 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -273,6 +273,8 @@ protected: struct symbol *sym; struct menu *menu; bool _showDebug; + + int has_dbg_info; }; class ConfigSearchWindow : public QDialog { diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index bdb7070dd3d..ee058155796 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2660,9 +2660,11 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask) rcu_read_unlock(); return 0; } - lock_sock(sock->sk); + local_bh_disable(); + bh_lock_sock_nested(sock->sk); rc = selinux_netlbl_socket_setsid(sock, sksec->sid); - release_sock(sock->sk); + bh_unlock_sock(sock->sk); + local_bh_enable(); rcu_read_unlock(); return rc; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 29e4c48151b..4e0c3c1b908 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5070,6 +5070,8 @@ static struct hda_board_config alc883_cfg_tbl[] = { { .modelname = "6stack-dig", .config = ALC883_6ST_DIG }, { .pci_subvendor = 0x1462, .pci_subdevice = 0x6668, .config = ALC883_6ST_DIG }, /* MSI */ + { .pci_subvendor = 0x1462, .pci_subdevice = 0x7280, + .config = ALC883_6ST_DIG }, /* MSI K9A Platinum (MS-7280) */ { .pci_subvendor = 0x105b, .pci_subdevice = 0x6668, .config = ALC883_6ST_DIG }, /* Foxconn */ { .modelname = "6stack-dig-demo", .config = ALC888_DEMO_BOARD }, diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c index edeb3d3c4c7..f5956d557f7 100644 --- a/sound/sparc/cs4231.c +++ b/sound/sparc/cs4231.c @@ -1268,7 +1268,7 @@ static struct snd_pcm_hardware snd_cs4231_playback = .channels_min = 1, .channels_max = 2, .buffer_bytes_max = (32*1024), - .period_bytes_min = 4096, + .period_bytes_min = 64, .period_bytes_max = (32*1024), .periods_min = 1, .periods_max = 1024, @@ -1288,7 +1288,7 @@ static struct snd_pcm_hardware snd_cs4231_capture = .channels_min = 1, .channels_max = 2, .buffer_bytes_max = (32*1024), - .period_bytes_min = 4096, + .period_bytes_min = 64, .period_bytes_max = (32*1024), .periods_min = 1, .periods_max = 1024, @@ -1796,7 +1796,7 @@ static irqreturn_t snd_cs4231_sbus_interrupt(int irq, void *dev_id) snd_cs4231_outm(chip, CS4231_IRQ_STATUS, ~CS4231_ALL_IRQS | ~status, 0); spin_unlock_irqrestore(&chip->lock, flags); - return 0; + return IRQ_HANDLED; } /* @@ -1821,7 +1821,6 @@ static int sbus_dma_request(struct cs4231_dma_control *dma_cont, dma_addr_t bus_ if (!(csr & test)) goto out; err = -EBUSY; - csr = sbus_readl(base->regs + APCCSR); test = APC_XINT_CNVA; if ( base->dir == APC_PLAY ) test = APC_XINT_PNVA; @@ -1862,17 +1861,16 @@ static void sbus_dma_enable(struct cs4231_dma_control *dma_cont, int on) spin_lock_irqsave(&base->lock, flags); if (!on) { - if (base->dir == APC_PLAY) { - sbus_writel(0, base->regs + base->dir + APCNVA); - sbus_writel(1, base->regs + base->dir + APCC); - } - else - { - sbus_writel(0, base->regs + base->dir + APCNC); - sbus_writel(0, base->regs + base->dir + APCVA); - } + sbus_writel(0, base->regs + base->dir + APCNC); + sbus_writel(0, base->regs + base->dir + APCNVA); + sbus_writel(0, base->regs + base->dir + APCC); + sbus_writel(0, base->regs + base->dir + APCVA); + + /* ACK any APC interrupts. */ + csr = sbus_readl(base->regs + APCCSR); + sbus_writel(csr, base->regs + APCCSR); } - udelay(600); + udelay(1000); csr = sbus_readl(base->regs + APCCSR); shift = 0; if ( base->dir == APC_PLAY ) |