aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/auxdisplay/cfag12864b-example.c1
-rw-r--r--Documentation/cgroups/cgroups.txt32
-rw-r--r--Documentation/cgroups/memory.txt41
-rw-r--r--Documentation/crypto/async-tx-api.txt75
-rw-r--r--Documentation/feature-removal-schedule.txt8
-rw-r--r--Documentation/filesystems/9p.txt40
-rw-r--r--Documentation/filesystems/sharedsubtree.txt220
-rw-r--r--Documentation/filesystems/vfs.txt7
-rw-r--r--Documentation/hwmon/coretemp4
-rw-r--r--Documentation/hwmon/fscher169
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/kbuild/kbuild.txt16
-rw-r--r--Documentation/kbuild/makefiles.txt20
-rw-r--r--Documentation/sysctl/fs.txt17
-rw-r--r--Documentation/sysctl/kernel.txt22
-rw-r--r--Documentation/sysctl/vm.txt41
-rw-r--r--Documentation/vm/.gitignore1
-rw-r--r--Documentation/vm/locking2
-rw-r--r--Documentation/vm/page-types.c200
-rw-r--r--MAINTAINERS23
-rw-r--r--Makefile62
-rw-r--r--arch/alpha/include/asm/fcntl.h2
-rw-r--r--arch/alpha/include/asm/smp.h2
-rw-r--r--arch/alpha/include/asm/topology.h18
-rw-r--r--arch/alpha/kernel/core_marvel.c2
-rw-r--r--arch/alpha/kernel/core_titan.c2
-rw-r--r--arch/alpha/kernel/pci_impl.h2
-rw-r--r--arch/alpha/kernel/pci_iommu.c4
-rw-r--r--arch/alpha/kernel/process.c1
-rw-r--r--arch/alpha/kernel/smp.c14
-rw-r--r--arch/arm/Makefile6
-rw-r--r--arch/arm/boot/install.sh4
-rw-r--r--arch/arm/include/asm/cacheflush.h8
-rw-r--r--arch/arm/include/asm/hardware/iop3xx-adma.h81
-rw-r--r--arch/arm/include/asm/hardware/iop_adma.h3
-rw-r--r--arch/arm/include/asm/mmu_context.h7
-rw-r--r--arch/arm/include/asm/smp.h1
-rw-r--r--arch/arm/include/asm/tlbflush.h4
-rw-r--r--arch/arm/kernel/Makefile3
-rw-r--r--arch/arm/kernel/init_task.c5
-rw-r--r--arch/arm/kernel/smp.c10
-rw-r--r--arch/arm/kernel/sys_arm.c1
-rw-r--r--arch/arm/mach-at91/at91sam9263_devices.c36
-rw-r--r--arch/arm/mach-at91/board-sam9263ek.c19
-rw-r--r--arch/arm/mach-at91/include/mach/board.h6
-rw-r--r--arch/arm/mach-iop13xx/include/mach/adma.h119
-rw-r--r--arch/arm/mach-iop13xx/setup.c17
-rw-r--r--arch/arm/mm/context.c2
-rw-r--r--arch/arm/mm/flush.c10
-rw-r--r--arch/arm/plat-iop/adma.c4
-rw-r--r--arch/avr32/kernel/init_task.c5
-rw-r--r--arch/avr32/mm/init.c4
-rw-r--r--arch/blackfin/Makefile4
-rw-r--r--arch/blackfin/boot/install.sh6
-rw-r--r--arch/cris/Makefile2
-rw-r--r--arch/cris/kernel/Makefile1
-rw-r--r--arch/cris/kernel/process.c5
-rw-r--r--arch/frv/kernel/init_task.c5
-rw-r--r--arch/frv/kernel/pm.c14
-rw-r--r--arch/frv/kernel/sys_frv.c1
-rw-r--r--arch/h8300/kernel/init_task.c5
-rw-r--r--arch/h8300/kernel/sys_h8300.c1
-rw-r--r--arch/ia64/include/asm/smp.h1
-rw-r--r--arch/ia64/include/asm/topology.h3
-rw-r--r--arch/ia64/install.sh4
-rw-r--r--arch/ia64/kernel/Makefile.gate2
-rw-r--r--arch/ia64/kernel/init_task.c3
-rw-r--r--arch/ia64/kernel/smp.c2
-rw-r--r--arch/m32r/boot/compressed/install.sh4
-rw-r--r--arch/m32r/include/asm/mmu_context.h4
-rw-r--r--arch/m32r/include/asm/smp.h2
-rw-r--r--arch/m32r/kernel/init_task.c5
-rw-r--r--arch/m32r/kernel/smp.c30
-rw-r--r--arch/m32r/kernel/smpboot.c2
-rw-r--r--arch/m68k/install.sh4
-rw-r--r--arch/m68k/kernel/process.c6
-rw-r--r--arch/m68k/kernel/sys_m68k.c1
-rw-r--r--arch/m68knommu/kernel/init_task.c5
-rw-r--r--arch/m68knommu/kernel/sys_m68k.c1
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/Makefile31
-rw-r--r--arch/microblaze/boot/Makefile41
l---------arch/microblaze/boot/dts/system.dts1
-rw-r--r--arch/microblaze/boot/linked_dtb.S3
-rw-r--r--arch/microblaze/configs/mmu_defconfig30
-rw-r--r--arch/microblaze/configs/nommu_defconfig35
-rw-r--r--arch/microblaze/include/asm/asm-compat.h17
-rw-r--r--arch/microblaze/include/asm/io.h3
-rw-r--r--arch/microblaze/include/asm/ipc.h1
-rw-r--r--arch/microblaze/include/asm/page.h3
-rw-r--r--arch/microblaze/include/asm/setup.h2
-rw-r--r--arch/microblaze/include/asm/syscall.h99
-rw-r--r--arch/microblaze/kernel/cpu/cpuinfo.c3
-rw-r--r--arch/microblaze/kernel/entry.S72
-rw-r--r--arch/microblaze/kernel/exceptions.c33
-rw-r--r--arch/microblaze/kernel/head.S14
-rw-r--r--arch/microblaze/kernel/hw_exception_handler.S10
-rw-r--r--arch/microblaze/kernel/init_task.c5
-rw-r--r--arch/microblaze/kernel/process.c1
-rw-r--r--arch/microblaze/kernel/ptrace.c62
-rw-r--r--arch/microblaze/kernel/setup.c12
-rw-r--r--arch/microblaze/kernel/sys_microblaze.c1
-rw-r--r--arch/microblaze/kernel/vmlinux.lds.S72
-rw-r--r--arch/microblaze/mm/init.c3
-rw-r--r--arch/mips/Makefile27
-rw-r--r--arch/mips/alchemy/common/time.c2
-rw-r--r--arch/mips/include/asm/mach-ip27/topology.h2
-rw-r--r--arch/mips/include/asm/mmu_context.h10
-rw-r--r--arch/mips/include/asm/smp-ops.h2
-rw-r--r--arch/mips/include/asm/smp.h2
-rw-r--r--arch/mips/kernel/init_task.c5
-rw-r--r--arch/mips/kernel/smp-cmp.c6
-rw-r--r--arch/mips/kernel/smp-mt.c6
-rw-r--r--arch/mips/kernel/smp-up.c3
-rw-r--r--arch/mips/kernel/smp.c8
-rw-r--r--arch/mips/kernel/smtc.c6
-rw-r--r--arch/mips/kernel/vmlinux.lds.S13
-rw-r--r--arch/mips/lasat/sysctl.c18
-rw-r--r--arch/mips/mipssim/sim_smtc.c5
-rw-r--r--arch/mips/mm/c-octeon.c2
-rw-r--r--arch/mips/mti-malta/malta-smtc.c4
-rw-r--r--arch/mips/pmc-sierra/yosemite/smp.c4
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-smp.c4
-rw-r--r--arch/mips/sibyte/bcm1480/smp.c5
-rw-r--r--arch/mips/sibyte/sb1250/smp.c5
-rw-r--r--arch/mn10300/include/asm/mmu_context.h12
-rw-r--r--arch/mn10300/kernel/init_task.c5
-rw-r--r--arch/mn10300/kernel/sys_mn10300.c1
-rw-r--r--arch/parisc/Makefile4
-rw-r--r--arch/parisc/include/asm/fcntl.h2
-rw-r--r--arch/parisc/include/asm/smp.h1
-rw-r--r--arch/parisc/install.sh4
-rw-r--r--arch/parisc/kernel/init_task.c4
-rw-r--r--arch/parisc/kernel/sys_parisc32.c1
-rw-r--r--arch/powerpc/Makefile6
-rw-r--r--arch/powerpc/boot/install.sh4
-rw-r--r--arch/powerpc/include/asm/fsldma.h136
-rw-r--r--arch/powerpc/include/asm/smp.h2
-rw-r--r--arch/powerpc/include/asm/topology.h12
-rw-r--r--arch/powerpc/kernel/init_task.c5
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c5
-rw-r--r--arch/powerpc/kernel/setup-common.c7
-rw-r--r--arch/powerpc/kernel/smp.c12
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c1
-rw-r--r--arch/powerpc/kernel/vdso.c3
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile2
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32_wrapper.S3
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile2
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64_wrapper.S3
-rw-r--r--arch/powerpc/platforms/powermac/smp.c6
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c6
-rw-r--r--arch/s390/appldata/appldata_base.c9
-rw-r--r--arch/s390/boot/install.sh4
-rw-r--r--arch/s390/include/asm/smp.h2
-rw-r--r--arch/s390/include/asm/topology.h1
-rw-r--r--arch/s390/kernel/compat_linux.c1
-rw-r--r--arch/s390/kernel/debug.c4
-rw-r--r--arch/s390/kernel/init_task.c5
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/smp.c4
-rw-r--r--arch/s390/kernel/vdso.c2
-rw-r--r--arch/s390/kernel/vdso32/Makefile2
-rw-r--r--arch/s390/kernel/vdso32/vdso32_wrapper.S3
-rw-r--r--arch/s390/kernel/vdso64/Makefile2
-rw-r--r--arch/s390/kernel/vdso64/vdso64_wrapper.S3
-rw-r--r--arch/s390/mm/cmm.c4
-rw-r--r--arch/score/kernel/init_task.c5
-rw-r--r--arch/sh/boot/compressed/install.sh4
-rw-r--r--arch/sh/drivers/dma/Kconfig12
-rw-r--r--arch/sh/drivers/dma/Makefile3
-rw-r--r--arch/sh/include/asm/dma-sh.h13
-rw-r--r--arch/sh/include/asm/smp.h1
-rw-r--r--arch/sh/include/asm/topology.h1
-rw-r--r--arch/sh/kernel/init_task.c5
-rw-r--r--arch/sh/kernel/irq.c6
-rw-r--r--arch/sh/kernel/sys_sh32.c1
-rw-r--r--arch/sh/kernel/sys_sh64.c1
-rw-r--r--arch/sh/kernel/vsyscall/Makefile2
-rw-r--r--arch/sparc/Makefile4
-rw-r--r--arch/sparc/include/asm/smp_64.h1
-rw-r--r--arch/sparc/include/asm/topology_64.h16
-rw-r--r--arch/sparc/kernel/Makefile6
-rw-r--r--arch/sparc/kernel/init_task.c5
-rw-r--r--arch/sparc/kernel/sys_sparc32.c1
-rw-r--r--arch/sparc/kernel/systbls.h3
-rw-r--r--arch/um/Makefile9
-rw-r--r--arch/um/include/asm/mmu_context.h4
-rw-r--r--arch/um/kernel/Makefile3
-rw-r--r--arch/um/kernel/init_task.c5
-rw-r--r--arch/um/kernel/smp.c2
-rw-r--r--arch/um/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/install.sh4
-rw-r--r--arch/x86/include/asm/cache.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h6
-rw-r--r--arch/x86/include/asm/nmi.h3
-rw-r--r--arch/x86/include/asm/pci.h6
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/kernel/apic/io_apic.c7
-rw-r--r--arch/x86/kernel/apic/nmi.c4
-rw-r--r--arch/x86/kernel/dumpstack_32.c1
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/head_32.S4
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/init_task.c5
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/smpboot.c9
-rw-r--r--arch/x86/kernel/time.c1
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/vsyscall_64.c10
-rw-r--r--arch/x86/mm/fault.c19
-rw-r--r--arch/x86/mm/pageattr.c1
-rw-r--r--arch/x86/mm/tlb.c15
-rw-r--r--arch/x86/pci/common.c2
-rw-r--r--arch/x86/vdso/Makefile2
-rw-r--r--arch/x86/xen/mmu.c4
-rw-r--r--arch/xtensa/kernel/Makefile3
-rw-r--r--arch/xtensa/kernel/head.S2
-rw-r--r--arch/xtensa/kernel/init_task.c5
-rw-r--r--crypto/async_tx/Kconfig9
-rw-r--r--crypto/async_tx/Makefile3
-rw-r--r--crypto/async_tx/async_memcpy.c44
-rw-r--r--crypto/async_tx/async_memset.c43
-rw-r--r--crypto/async_tx/async_pq.c395
-rw-r--r--crypto/async_tx/async_raid6_recov.c468
-rw-r--r--crypto/async_tx/async_tx.c87
-rw-r--r--crypto/async_tx/async_xor.c207
-rw-r--r--crypto/async_tx/raid6test.c240
-rw-r--r--drivers/acpi/button.c45
-rw-r--r--drivers/acpi/osl.c2
-rw-r--r--drivers/acpi/processor_perflib.c3
-rw-r--r--drivers/acpi/processor_throttling.c3
-rw-r--r--drivers/atm/he.c59
-rw-r--r--drivers/atm/solos-attrlist.c11
-rw-r--r--drivers/atm/solos-pci.c75
-rw-r--r--drivers/cdrom/cdrom.c8
-rw-r--r--drivers/char/Kconfig8
-rw-r--r--drivers/char/Makefile1
-rw-r--r--drivers/char/agp/intel-agp.c37
-rw-r--r--drivers/char/bfin-otp.c173
-rw-r--r--drivers/char/hpet.c21
-rw-r--r--drivers/char/mem.c2
-rw-r--r--drivers/char/mwave/mwavedd.c22
-rw-r--r--drivers/char/random.c4
-rw-r--r--drivers/char/rio/rioctrl.c2
-rw-r--r--drivers/char/tpm/tpm.c5
-rw-r--r--drivers/char/uv_mmtimer.c216
-rw-r--r--drivers/dca/dca-core.c124
-rw-r--r--drivers/dma/Kconfig14
-rw-r--r--drivers/dma/Makefile4
-rw-r--r--drivers/dma/at_hdmac.c60
-rw-r--r--drivers/dma/at_hdmac_regs.h1
-rw-r--r--drivers/dma/dmaengine.c94
-rw-r--r--drivers/dma/dmatest.c40
-rw-r--r--drivers/dma/dw_dmac.c50
-rw-r--r--drivers/dma/dw_dmac_regs.h1
-rw-r--r--drivers/dma/fsldma.c288
-rw-r--r--drivers/dma/fsldma.h4
-rw-r--r--drivers/dma/ioat.c202
-rw-r--r--drivers/dma/ioat/Makefile2
-rw-r--r--drivers/dma/ioat/dca.c (renamed from drivers/dma/ioat_dca.c)13
-rw-r--r--drivers/dma/ioat/dma.c1238
-rw-r--r--drivers/dma/ioat/dma.h337
-rw-r--r--drivers/dma/ioat/dma_v2.c871
-rw-r--r--drivers/dma/ioat/dma_v2.h190
-rw-r--r--drivers/dma/ioat/dma_v3.c1223
-rw-r--r--drivers/dma/ioat/hw.h215
-rw-r--r--drivers/dma/ioat/pci.c210
-rw-r--r--drivers/dma/ioat/registers.h (renamed from drivers/dma/ioatdma_registers.h)54
-rw-r--r--drivers/dma/ioat_dma.c1741
-rw-r--r--drivers/dma/ioatdma.h165
-rw-r--r--drivers/dma/ioatdma_hw.h70
-rw-r--r--drivers/dma/iop-adma.c491
-rw-r--r--drivers/dma/iovlock.c10
-rw-r--r--drivers/dma/mv_xor.c7
-rw-r--r--drivers/dma/mv_xor.h4
-rw-r--r--drivers/dma/shdma.c786
-rw-r--r--drivers/dma/shdma.h64
-rw-r--r--drivers/dma/txx9dmac.c24
-rw-r--r--drivers/dma/txx9dmac.h1
-rw-r--r--drivers/edac/Kconfig13
-rw-r--r--drivers/edac/Makefile2
-rw-r--r--drivers/edac/cpc925_edac.c6
-rw-r--r--drivers/edac/edac_device.c5
-rw-r--r--drivers/edac/edac_mc.c4
-rw-r--r--drivers/edac/edac_pci.c4
-rw-r--r--drivers/edac/i3200_edac.c527
-rw-r--r--drivers/edac/mpc85xx_edac.c30
-rw-r--r--drivers/edac/mv64x60_edac.c22
-rw-r--r--drivers/gpu/drm/Kconfig1
-rw-r--r--drivers/gpu/drm/drm_gem.c13
-rw-r--r--drivers/gpu/drm/i915/Makefile1
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c6
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c194
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c133
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h77
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c876
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c92
-rw-r--r--drivers/gpu/drm/i915/i915_opregion.c22
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h34
-rw-r--r--drivers/gpu/drm/i915/i915_suspend.c170
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h315
-rw-r--r--drivers/gpu/drm/i915/i915_trace_points.c11
-rw-r--r--drivers/gpu/drm/i915/intel_bios.c3
-rw-r--r--drivers/gpu/drm/i915/intel_crt.c9
-rw-r--r--drivers/gpu/drm/i915/intel_display.c616
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h5
-rw-r--r--drivers/gpu/drm/i915/intel_lvds.c63
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c502
-rw-r--r--drivers/hwmon/Kconfig34
-rw-r--r--drivers/hwmon/Makefile2
-rw-r--r--drivers/hwmon/adm1031.c40
-rw-r--r--drivers/hwmon/coretemp.c57
-rw-r--r--drivers/hwmon/fscher.c680
-rw-r--r--drivers/hwmon/fscpos.c654
-rw-r--r--drivers/hwmon/ltc4215.c2
-rw-r--r--drivers/hwmon/ltc4245.c3
-rw-r--r--drivers/idle/i7300_idle.c20
-rw-r--r--drivers/input/input.c64
-rw-r--r--drivers/input/keyboard/Kconfig40
-rw-r--r--drivers/input/keyboard/Makefile4
-rw-r--r--drivers/input/keyboard/adp5588-keys.c361
-rw-r--r--drivers/input/keyboard/atkbd.c25
-rw-r--r--drivers/input/keyboard/max7359_keypad.c330
-rw-r--r--drivers/input/keyboard/opencores-kbd.c180
-rw-r--r--drivers/input/keyboard/qt2160.c397
-rw-r--r--drivers/input/misc/Kconfig1
-rw-r--r--drivers/input/misc/dm355evm_keys.c26
-rw-r--r--drivers/input/mouse/sentelic.c18
-rw-r--r--drivers/input/mouse/synaptics_i2c.c51
-rw-r--r--drivers/input/serio/i8042.c41
-rw-r--r--drivers/input/serio/libps2.c28
-rw-r--r--drivers/input/touchscreen/Kconfig17
-rw-r--r--drivers/input/touchscreen/Makefile1
-rw-r--r--drivers/input/touchscreen/ad7879.c6
-rw-r--r--drivers/input/touchscreen/mcs5000_ts.c318
-rw-r--r--drivers/leds/leds-clevo-mail.c8
-rw-r--r--drivers/md/Kconfig26
-rw-r--r--drivers/md/bitmap.c5
-rw-r--r--drivers/md/linear.c3
-rw-r--r--drivers/md/md.c25
-rw-r--r--drivers/md/md.h1
-rw-r--r--drivers/md/multipath.c6
-rw-r--r--drivers/md/raid0.c8
-rw-r--r--drivers/md/raid1.c15
-rw-r--r--drivers/md/raid10.c12
-rw-r--r--drivers/md/raid5.c1493
-rw-r--r--drivers/md/raid5.h28
-rw-r--r--drivers/media/dvb/dvb-core/dvbdev.h5
-rw-r--r--drivers/media/dvb/dvb-usb/Kconfig2
-rw-r--r--drivers/media/video/saa7164/saa7164-api.c8
-rw-r--r--drivers/media/video/saa7164/saa7164-cmd.c2
-rw-r--r--drivers/media/video/saa7164/saa7164-core.c6
-rw-r--r--drivers/media/video/saa7164/saa7164.h4
-rw-r--r--drivers/media/video/usbvision/usbvision-core.c1
-rw-r--r--drivers/media/video/usbvision/usbvision-i2c.c1
-rw-r--r--drivers/media/video/usbvision/usbvision-video.c1
-rw-r--r--drivers/memstick/core/memstick.c2
-rw-r--r--drivers/misc/sgi-gru/grukservices.c2
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c3
-rw-r--r--drivers/mmc/host/atmel-mci.c9
-rw-r--r--drivers/mtd/Kconfig2
-rw-r--r--drivers/mtd/maps/Kconfig2
-rw-r--r--drivers/net/3c59x.c12
-rw-r--r--drivers/net/8139cp.c2
-rw-r--r--drivers/net/Kconfig2
-rw-r--r--drivers/net/atl1c/atl1c_main.c2
-rw-r--r--drivers/net/can/Kconfig13
-rw-r--r--drivers/net/can/Makefile3
-rw-r--r--drivers/net/can/sja1000/ems_pci.c16
-rw-r--r--drivers/net/can/usb/Makefile5
-rw-r--r--drivers/net/can/usb/ems_usb.c1155
-rw-r--r--drivers/net/cnic.c4
-rw-r--r--drivers/net/cpmac.c8
-rw-r--r--drivers/net/ehea/ehea_main.c1
-rw-r--r--drivers/net/igb/e1000_mac.c72
-rw-r--r--drivers/net/igb/e1000_mac.h1
-rw-r--r--drivers/net/ixgbe/ixgbe.h6
-rw-r--r--drivers/net/ixgbe/ixgbe_ethtool.c75
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c111
-rw-r--r--drivers/net/netxen/netxen_nic_main.c8
-rw-r--r--drivers/net/pcmcia/pcnet_cs.c11
-rw-r--r--drivers/net/sfc/efx.c3
-rw-r--r--drivers/net/sky2.c4
-rw-r--r--drivers/net/sunvnet.c1
-rw-r--r--drivers/net/tun.c4
-rw-r--r--drivers/net/usb/kaweth.c18
-rw-r--r--drivers/net/usb/smsc95xx.c67
-rw-r--r--drivers/net/usb/usbnet.c2
-rw-r--r--drivers/net/wireless/arlan-proc.c28
-rw-r--r--drivers/net/wireless/ath/ar9170/usb.c2
-rw-r--r--drivers/net/wireless/ath/ath9k/calib.c23
-rw-r--r--drivers/net/wireless/ath/ath9k/calib.h1
-rw-r--r--drivers/net/wireless/ath/ath9k/eeprom_def.c4
-rw-r--r--drivers/net/wireless/ath/ath9k/hw.c202
-rw-r--r--drivers/net/wireless/ath/ath9k/hw.h4
-rw-r--r--drivers/net/wireless/ath/ath9k/main.c16
-rw-r--r--drivers/net/wireless/ath/ath9k/reg.h3
-rw-r--r--drivers/net/wireless/b43/Kconfig21
-rw-r--r--drivers/net/wireless/b43/Makefile1
-rw-r--r--drivers/net/wireless/b43/b43.h23
-rw-r--r--drivers/net/wireless/b43/debugfs.c1
-rw-r--r--drivers/net/wireless/b43/debugfs.h1
-rw-r--r--drivers/net/wireless/b43/dma.c4
-rw-r--r--drivers/net/wireless/b43/leds.c266
-rw-r--r--drivers/net/wireless/b43/leds.h33
-rw-r--r--drivers/net/wireless/b43/main.c224
-rw-r--r--drivers/net/wireless/b43/phy_lp.c12
-rw-r--r--drivers/net/wireless/b43/pio.c2
-rw-r--r--drivers/net/wireless/b43/rfkill.c2
-rw-r--r--drivers/net/wireless/b43/sdio.c202
-rw-r--r--drivers/net/wireless/b43/sdio.h45
-rw-r--r--drivers/net/wireless/b43/xmit.c5
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-4965.c6
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-5000.c6
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-rx.c10
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-sta.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl3945-base.c9
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00lib.h2
-rw-r--r--drivers/net/wireless/wl12xx/Kconfig2
-rw-r--r--drivers/net/wireless/zd1211rw/zd_usb.c2
-rw-r--r--drivers/net/xilinx_emaclite.c7
-rw-r--r--drivers/oprofile/buffer_sync.c3
-rw-r--r--drivers/parport/procfs.c12
-rw-r--r--drivers/pci/hotplug/pciehp.h107
-rw-r--r--drivers/pci/hotplug/pciehp_acpi.c17
-rw-r--r--drivers/pci/hotplug/pciehp_core.c136
-rw-r--r--drivers/pci/hotplug/pciehp_ctrl.c109
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c109
-rw-r--r--drivers/pci/hotplug/pciehp_pci.c23
-rw-r--r--drivers/pci/pcie/aer/aerdrv.c2
-rw-r--r--drivers/pci/pcie/aspm.c3
-rw-r--r--drivers/platform/x86/acer-wmi.c2
-rw-r--r--drivers/s390/char/zcore.c1
-rw-r--r--drivers/staging/Kconfig2
-rw-r--r--drivers/staging/Makefile1
-rw-r--r--drivers/staging/cpc-usb/Kconfig4
-rw-r--r--drivers/staging/cpc-usb/Makefile3
-rw-r--r--drivers/staging/cpc-usb/TODO10
-rw-r--r--drivers/staging/cpc-usb/cpc-usb_drv.c1184
-rw-r--r--drivers/staging/cpc-usb/cpc.h417
-rw-r--r--drivers/staging/cpc-usb/cpc_int.h83
-rw-r--r--drivers/staging/cpc-usb/cpcusb.h86
-rw-r--r--drivers/staging/cpc-usb/sja2m16c.h41
-rw-r--r--drivers/staging/cpc-usb/sja2m16c_2.c452
-rw-r--r--drivers/staging/go7007/Makefile5
-rw-r--r--drivers/usb/Kconfig1
-rw-r--r--drivers/usb/gadget/f_loopback.c1
-rw-r--r--drivers/usb/gadget/f_obex.c1
-rw-r--r--drivers/usb/gadget/f_sourcesink.c1
-rw-r--r--drivers/usb/gadget/u_audio.c1
-rw-r--r--drivers/usb/gadget/u_ether.c1
-rw-r--r--drivers/usb/serial/sierra.c5
-rw-r--r--drivers/vlynq/vlynq.c2
-rw-r--r--fs/9p/Kconfig9
-rw-r--r--fs/9p/Makefile3
-rw-r--r--fs/9p/cache.c474
-rw-r--r--fs/9p/cache.h176
-rw-r--r--fs/9p/v9fs.c196
-rw-r--r--fs/9p/v9fs.h13
-rw-r--r--fs/9p/v9fs_vfs.h6
-rw-r--r--fs/9p/vfs_addr.c88
-rw-r--r--fs/9p/vfs_file.c25
-rw-r--r--fs/9p/vfs_inode.c61
-rw-r--r--fs/9p/vfs_super.c16
-rw-r--r--fs/adfs/inode.c7
-rw-r--r--fs/attr.c46
-rw-r--r--fs/befs/linuxvfs.c7
-rw-r--r--fs/binfmt_elf.c52
-rw-r--r--fs/binfmt_elf_fdpic.c17
-rw-r--r--fs/binfmt_flat.c22
-rw-r--r--fs/block_dev.c140
-rw-r--r--fs/btrfs/async-thread.c254
-rw-r--r--fs/btrfs/async-thread.h12
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/compression.c8
-rw-r--r--fs/btrfs/ctree.c6
-rw-r--r--fs/btrfs/ctree.h78
-rw-r--r--fs/btrfs/dir-item.c47
-rw-r--r--fs/btrfs/disk-io.c235
-rw-r--r--fs/btrfs/export.c133
-rw-r--r--fs/btrfs/extent-tree.c1662
-rw-r--r--fs/btrfs/extent_io.c330
-rw-r--r--fs/btrfs/extent_io.h16
-rw-r--r--fs/btrfs/extent_map.c103
-rw-r--r--fs/btrfs/extent_map.h5
-rw-r--r--fs/btrfs/file.c35
-rw-r--r--fs/btrfs/free-space-cache.c36
-rw-r--r--fs/btrfs/inode-item.c4
-rw-r--r--fs/btrfs/inode-map.c93
-rw-r--r--fs/btrfs/inode.c664
-rw-r--r--fs/btrfs/ioctl.c339
-rw-r--r--fs/btrfs/ioctl.h3
-rw-r--r--fs/btrfs/ordered-data.c33
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/orphan.c20
-rw-r--r--fs/btrfs/relocation.c280
-rw-r--r--fs/btrfs/root-tree.c138
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/transaction.c38
-rw-r--r--fs/btrfs/tree-log.c25
-rw-r--r--fs/btrfs/volumes.c117
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/buffer.c10
-rw-r--r--fs/char_dev.c3
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/inode.c53
-rw-r--r--fs/coda/coda_int.h1
-rw-r--r--fs/compat.c24
-rw-r--r--fs/drop_caches.c4
-rw-r--r--fs/exec.c114
-rw-r--r--fs/exofs/super.c6
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/inode.c3
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/fat/inode.c16
-rw-r--r--fs/fcntl.c108
-rw-r--r--fs/file_table.c6
-rw-r--r--fs/fuse/dir.c14
-rw-r--r--fs/fuse/fuse_i.h2
-rw-r--r--fs/fuse/inode.c11
-rw-r--r--fs/gfs2/aops.c3
-rw-r--r--fs/gfs2/ops_inode.c1
-rw-r--r--fs/hfs/mdb.c6
-rw-r--r--fs/hfsplus/super.c6
-rw-r--r--fs/hugetlbfs/inode.c45
-rw-r--r--fs/inode.c89
-rw-r--r--fs/internal.h1
-rw-r--r--fs/ioctl.c9
-rw-r--r--fs/isofs/inode.c8
-rw-r--r--fs/jfs/super.c9
-rw-r--r--fs/libfs.c13
-rw-r--r--fs/lockd/xdr.c1
-rw-r--r--fs/lockd/xdr4.c1
-rw-r--r--fs/namespace.c77
-rw-r--r--fs/ncpfs/inode.c12
-rw-r--r--fs/ncpfs/ioctl.c6
-rw-r--r--fs/nfs/client.c13
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/fscache.c25
-rw-r--r--fs/nfs/fscache.h6
-rw-r--r--fs/nfs/inode.c54
-rw-r--r--fs/nfs/nfs2xdr.c1
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs3xdr.c1
-rw-r--r--fs/nfs/nfs4proc.c1
-rw-r--r--fs/nfs/nfs4xdr.c1
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/super.c76
-rw-r--r--fs/nfsd/nfs4idmap.c1
-rw-r--r--fs/nls/nls_base.c3
-rw-r--r--fs/ntfs/aops.c2
-rw-r--r--fs/ntfs/super.c10
-rw-r--r--fs/ocfs2/aops.c1
-rw-r--r--fs/ocfs2/dlm/dlmast.c1
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c1
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c1
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c1
-rw-r--r--fs/ocfs2/dlm/dlmlock.c1
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c1
-rw-r--r--fs/ocfs2/dlm/dlmthread.c1
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c1
-rw-r--r--fs/ocfs2/super.c1
-rw-r--r--fs/ocfs2/symlink.c1
-rw-r--r--fs/proc/meminfo.c9
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/uptime.c7
-rw-r--r--fs/ramfs/file-nommu.c18
-rw-r--r--fs/read_write.c3
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/seq_file.c74
-rw-r--r--fs/smbfs/inode.c10
-rw-r--r--fs/super.c67
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c3
-rw-r--r--include/acpi/button.h25
-rw-r--r--include/asm-generic/fcntl.h13
-rw-r--r--include/asm-generic/mman-common.h1
-rw-r--r--include/asm-generic/siginfo.h8
-rw-r--r--include/asm-generic/topology.h17
-rw-r--r--include/drm/drm_pciids.h1
-rw-r--r--include/drm/i915_drm.h19
-rw-r--r--include/linux/async_tx.h129
-rw-r--r--include/linux/binfmts.h2
-rw-r--r--include/linux/cgroup.h53
-rw-r--r--include/linux/configfs.h4
-rw-r--r--include/linux/cpumask.h709
-rw-r--r--include/linux/cred.h18
-rw-r--r--include/linux/dca.h11
-rw-r--r--include/linux/debugfs.h2
-rw-r--r--include/linux/dmaengine.h179
-rw-r--r--include/linux/fs.h11
-rw-r--r--include/linux/ftrace.h4
-rw-r--r--include/linux/futex.h10
-rw-r--r--include/linux/hugetlb.h6
-rw-r--r--include/linux/i2c/adp5588.h92
-rw-r--r--include/linux/i2c/mcs5000_ts.h24
-rw-r--r--include/linux/i8042.h30
-rw-r--r--include/linux/input.h2
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/linux/libps2.h2
-rw-r--r--include/linux/linkage.h2
-rw-r--r--include/linux/memcontrol.h10
-rw-r--r--include/linux/mm.h22
-rw-r--r--include/linux/mm_types.h7
-rw-r--r--include/linux/mmzone.h13
-rw-r--r--include/linux/module.h17
-rw-r--r--include/linux/netlink.h1
-rw-r--r--include/linux/page-flags.h17
-rw-r--r--include/linux/page_cgroup.h13
-rw-r--r--include/linux/pci_ids.h10
-rw-r--r--include/linux/phonet.h1
-rw-r--r--include/linux/prctl.h2
-rw-r--r--include/linux/relay.h2
-rw-r--r--include/linux/res_counter.h64
-rw-r--r--include/linux/rmap.h21
-rw-r--r--include/linux/sched.h20
-rw-r--r--include/linux/security.h2
-rw-r--r--include/linux/seq_file.h38
-rw-r--r--include/linux/signal.h2
-rw-r--r--include/linux/smp.h11
-rw-r--r--include/linux/sunrpc/xdr.h5
-rw-r--r--include/linux/swap.h41
-rw-r--r--include/linux/swapops.h38
-rw-r--r--include/linux/sysctl.h19
-rw-r--r--include/linux/time.h28
-rw-r--r--include/linux/topology.h6
-rw-r--r--include/linux/tracehook.h34
-rw-r--r--include/linux/tracepoint.h2
-rw-r--r--include/linux/unaligned/be_byteshift.h2
-rw-r--r--include/linux/unaligned/le_byteshift.h2
-rw-r--r--include/linux/usb/usbnet.h1
-rw-r--r--include/linux/utsname.h1
-rw-r--r--include/linux/vgaarb.h3
-rw-r--r--include/linux/writeback.h11
-rw-r--r--include/net/9p/9p.h3
-rw-r--r--include/net/ip.h2
-rw-r--r--include/net/ipip.h1
-rw-r--r--include/net/ndisc.h2
-rw-r--r--init/Kconfig8
-rw-r--r--init/main.c6
-rw-r--r--ipc/ipc_sysctl.c16
-rw-r--r--ipc/mq_sysctl.c8
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/audit.c18
-rw-r--r--kernel/audit_watch.c2
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/cgroup.c1107
-rw-r--r--kernel/cgroup_debug.c105
-rw-r--r--kernel/cgroup_freezer.c15
-rw-r--r--kernel/cpuset.c66
-rw-r--r--kernel/cred.c19
-rw-r--r--kernel/exit.c146
-rw-r--r--kernel/fork.c34
-rw-r--r--kernel/gcov/Kconfig2
-rw-r--r--kernel/hung_task.c4
-rw-r--r--kernel/kmod.c13
-rw-r--r--kernel/module.c159
-rw-r--r--kernel/ns_cgroup.c16
-rw-r--r--kernel/params.c7
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/power/swap.c1
-rw-r--r--kernel/ptrace.c11
-rw-r--r--kernel/res_counter.c21
-rw-r--r--kernel/sched.c39
-rw-r--r--kernel/sched_fair.c4
-rw-r--r--kernel/signal.c168
-rw-r--r--kernel/slow-work.c12
-rw-r--r--kernel/smp.c7
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/sys.c22
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c113
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/timeconv.c127
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace.c7
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/uid16.c1
-rw-r--r--kernel/utsname_sysctl.c4
-rw-r--r--lib/Kconfig.debug8
-rw-r--r--lib/decompress_inflate.c8
-rw-r--r--lib/decompress_unlzma.c10
-rw-r--r--lib/vsprintf.c25
-rw-r--r--mm/Kconfig14
-rw-r--r--mm/Makefile2
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/hugetlb.c12
-rw-r--r--mm/hwpoison-inject.c41
-rw-r--r--mm/ksm.c14
-rw-r--r--mm/madvise.c30
-rw-r--r--mm/memcontrol.c737
-rw-r--r--mm/memory-failure.c832
-rw-r--r--mm/memory.c86
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/nommu.c40
-rw-r--r--mm/page-writeback.c27
-rw-r--r--mm/page_alloc.c44
-rw-r--r--mm/quicklist.c3
-rw-r--r--mm/rmap.c60
-rw-r--r--mm/shmem.c5
-rw-r--r--mm/swapfile.c4
-rw-r--r--mm/truncate.c136
-rw-r--r--mm/vmscan.c51
-rw-r--r--net/ax25/af_ax25.c4
-rw-r--r--net/bridge/br_netfilter.c4
-rw-r--r--net/core/pktgen.c160
-rw-r--r--net/decnet/dn_dev.c5
-rw-r--r--net/decnet/sysctl_net_decnet.c2
-rw-r--r--net/ipv4/devinet.c12
-rw-r--r--net/ipv4/ip_gre.c13
-rw-r--r--net/ipv4/ip_sockglue.c3
-rw-r--r--net/ipv4/ipip.c8
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv6/addrconf.c8
-rw-r--r--net/ipv6/ip6_tunnel.c7
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/sit.c8
-rw-r--r--net/irda/irsysctl.c8
-rw-r--r--net/mac80211/scan.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/nf_log.c4
-rw-r--r--net/netlink/af_netlink.c19
-rw-r--r--net/netlink/genetlink.c4
-rw-r--r--net/phonet/af_phonet.c6
-rw-r--r--net/phonet/socket.c16
-rw-r--r--net/phonet/sysctl.c4
-rw-r--r--net/sunrpc/auth_null.c1
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/rpc_pipe.c3
-rw-r--r--net/sunrpc/sysctl.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c2
-rw-r--r--net/sunrpc/xprtsock.c9
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--scripts/Kbuild.include16
-rw-r--r--scripts/Makefile.build6
-rw-r--r--scripts/basic/docproc.c34
-rw-r--r--scripts/basic/fixdep.c26
-rw-r--r--scripts/basic/hash.c4
-rwxr-xr-xscripts/checkincludes.pl71
-rw-r--r--scripts/kconfig/conf.c24
-rw-r--r--scripts/kconfig/confdata.c2
-rw-r--r--scripts/kconfig/expr.c6
-rw-r--r--scripts/kconfig/gconf.c21
-rw-r--r--scripts/kconfig/gconf.glade4
-rw-r--r--scripts/kconfig/kxgettext.c4
-rw-r--r--scripts/kconfig/lkc_proto.h2
-rw-r--r--scripts/kconfig/mconf.c78
-rw-r--r--scripts/kconfig/menu.c84
-rw-r--r--scripts/kconfig/qconf.cc10
-rw-r--r--scripts/kconfig/symbol.c6
-rw-r--r--scripts/markup_oops.pl5
-rwxr-xr-xscripts/tags.sh3
-rw-r--r--security/device_cgroup.c3
-rw-r--r--security/keys/gc.c4
-rw-r--r--security/lsm_audit.c2
-rw-r--r--security/min_addr.c4
-rw-r--r--security/selinux/avc.c19
-rw-r--r--security/selinux/hooks.c2
-rw-r--r--usr/.gitignore2
-rw-r--r--usr/Makefile2
-rw-r--r--virt/kvm/kvm_main.c3
768 files changed, 28193 insertions, 14141 deletions
diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c
index 1d2c010bae1..e7823ffb1ca 100644
--- a/Documentation/auxdisplay/cfag12864b-example.c
+++ b/Documentation/auxdisplay/cfag12864b-example.c
@@ -194,7 +194,6 @@ static void cfag12864b_blit(void)
*/
#include <stdio.h>
-#include <string.h>
#define EXAMPLES 6
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 6eb1a97e88c..455d4e6d346 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -408,6 +408,26 @@ You can attach the current shell task by echoing 0:
# echo 0 > tasks
+2.3 Mounting hierarchies by name
+--------------------------------
+
+Passing the name=<x> option when mounting a cgroups hierarchy
+associates the given name with the hierarchy. This can be used when
+mounting a pre-existing hierarchy, in order to refer to it by name
+rather than by its set of active subsystems. Each hierarchy is either
+nameless, or has a unique name.
+
+The name should match [\w.-]+
+
+When passing a name=<x> option for a new hierarchy, you need to
+specify subsystems manually; the legacy behaviour of mounting all
+subsystems when none are explicitly specified is not supported when
+you give a subsystem a name.
+
+The name of the subsystem appears as part of the hierarchy description
+in /proc/mounts and /proc/<pid>/cgroups.
+
+
3. Kernel API
=============
@@ -501,7 +521,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
called multiple times against a cgroup.
int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct task_struct *task)
+ struct task_struct *task, bool threadgroup)
(cgroup_mutex held by caller)
Called prior to moving a task into a cgroup; if the subsystem
@@ -509,14 +529,20 @@ returns an error, this will abort the attach operation. If a NULL
task is passed, then a successful result indicates that *any*
unspecified task can be moved into the cgroup. Note that this isn't
called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex.
+remain valid while the caller holds cgroup_mutex. If threadgroup is
+true, then a successful result indicates that all threads in the given
+thread's threadgroup can be moved together.
void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cgrp, struct task_struct *task)
+ struct cgroup *old_cgrp, struct task_struct *task,
+ bool threadgroup)
(cgroup_mutex held by caller)
Called after the task has been attached to the cgroup, to allow any
post-attachment activity that requires memory allocations or blocking.
+If threadgroup is true, the subsystem should take care of all threads
+in the specified thread's threadgroup. Currently does not support any
+subsystem that might need the old_cgrp for every thread in the group.
void fork(struct cgroup_subsy *ss, struct task_struct *task)
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 23d1262c077..b871f2552b4 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that
pages that are selected for reclaiming come from the per cgroup LRU
list.
+NOTE: Reclaim does not work for the root cgroup, since we cannot set any
+limits on the root cgroup.
+
2. Locking
The memory controller uses the following hierarchy
@@ -210,6 +213,7 @@ We can alter the memory limit:
NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
mega or gigabytes.
NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
+NOTE: We cannot set limits on the root cgroup any more.
# cat /cgroups/0/memory.limit_in_bytes
4194304
@@ -375,7 +379,42 @@ cgroups created below it.
NOTE2: This feature can be enabled/disabled per subtree.
-7. TODO
+7. Soft limits
+
+Soft limits allow for greater sharing of memory. The idea behind soft limits
+is to allow control groups to use as much of the memory as needed, provided
+
+a. There is no memory contention
+b. They do not exceed their hard limit
+
+When the system detects memory contention or low memory control groups
+are pushed back to their soft limits. If the soft limit of each control
+group is very high, they are pushed back as much as possible to make
+sure that one control group does not starve the others of memory.
+
+Please note that soft limits is a best effort feature, it comes with
+no guarantees, but it does its best to make sure that when memory is
+heavily contended for, memory is allocated based on the soft limit
+hints/setup. Currently soft limit based reclaim is setup such that
+it gets invoked from balance_pgdat (kswapd).
+
+7.1 Interface
+
+Soft limits can be setup by using the following commands (in this example we
+assume a soft limit of 256 megabytes)
+
+# echo 256M > memory.soft_limit_in_bytes
+
+If we want to change this to 1G, we can at any time use
+
+# echo 1G > memory.soft_limit_in_bytes
+
+NOTE1: Soft limits take effect over a long period of time, since they involve
+ reclaiming memory for balancing between memory cgroups
+NOTE2: It is recommended to set the soft limit always below the hard limit,
+ otherwise the hard limit will take precedence.
+
+8. TODO
1. Add support for accounting huge pages (as a separate controller)
2. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 9f59fcbf5d8..ba046b8fa92 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -54,20 +54,23 @@ features surfaced as a result:
3.1 General format of the API:
struct dma_async_tx_descriptor *
-async_<operation>(<op specific parameters>,
- enum async_tx_flags flags,
- struct dma_async_tx_descriptor *dependency,
- dma_async_tx_callback callback_routine,
- void *callback_parameter);
+async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
3.2 Supported operations:
-memcpy - memory copy between a source and a destination buffer
-memset - fill a destination buffer with a byte value
-xor - xor a series of source buffers and write the result to a
- destination buffer
-xor_zero_sum - xor a series of source buffers and set a flag if the
- result is zero. The implementation attempts to prevent
- writes to memory
+memcpy - memory copy between a source and a destination buffer
+memset - fill a destination buffer with a byte value
+xor - xor a series of source buffers and write the result to a
+ destination buffer
+xor_val - xor a series of source buffers and set a flag if the
+ result is zero. The implementation attempts to prevent
+ writes to memory
+pq - generate the p+q (raid6 syndrome) from a series of source buffers
+pq_val - validate that a p and or q buffer are in sync with a given series of
+ sources
+datap - (raid6_datap_recov) recover a raid6 data block and the p block
+ from the given sources
+2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given
+ sources
3.3 Descriptor management:
The return value is non-NULL and points to a 'descriptor' when the operation
@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to
recycle (or free) the descriptor. A descriptor can be acked by one of the
following methods:
1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
-2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
- descriptor of a new operation.
+2/ submitting an unacknowledged descriptor as a dependency to another
+ async_tx call will implicitly set the acknowledged state.
3/ calling async_tx_ack() on the descriptor.
3.4 When does the operation execute?
@@ -119,30 +122,42 @@ of an operation.
Perform a xor->copy->xor operation where each operation depends on the
result from the previous operation:
-void complete_xor_copy_xor(void *param)
+void callback(void *param)
{
- printk("complete\n");
+ struct completion *cmp = param;
+
+ complete(cmp);
}
-int run_xor_copy_xor(struct page **xor_srcs,
- int xor_src_cnt,
- struct page *xor_dest,
- size_t xor_len,
- struct page *copy_src,
- struct page *copy_dest,
- size_t copy_len)
+void run_xor_copy_xor(struct page **xor_srcs,
+ int xor_src_cnt,
+ struct page *xor_dest,
+ size_t xor_len,
+ struct page *copy_src,
+ struct page *copy_dest,
+ size_t copy_len)
{
struct dma_async_tx_descriptor *tx;
+ addr_conv_t addr_conv[xor_src_cnt];
+ struct async_submit_ctl submit;
+ addr_conv_t addr_conv[NDISKS];
+ struct completion cmp;
+
+ init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
+ addr_conv);
+ tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
- tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
- ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
- tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
- ASYNC_TX_DEP_ACK, tx, NULL, NULL);
- tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
- ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
- tx, complete_xor_copy_xor, NULL);
+ submit->depend_tx = tx;
+ tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
+
+ init_completion(&cmp);
+ init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
+ callback, &cmp, addr_conv);
+ tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
async_tx_issue_pending_all();
+
+ wait_for_completion(&cmp);
}
See include/linux/async_tx.h for more information on the flags. See the
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index fa75220f8d3..89a47b5aff0 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -354,14 +354,6 @@ Who: Krzysztof Piotr Oledzki <ole@ans.pl>
---------------------------
-What: fscher and fscpos drivers
-When: June 2009
-Why: Deprecated by the new fschmd driver.
-Who: Hans de Goede <hdegoede@redhat.com>
- Jean Delvare <khali@linux-fr.org>
-
----------------------------
-
What: sysfs ui for changing p4-clockmod parameters
When: September 2009
Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt
index 6208f55c44c..57e0b80a527 100644
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.txt
@@ -18,11 +18,11 @@ the 9p client is available in the form of a USENIX paper:
Other applications are described in the following papers:
* XCPU & Clustering
- http://www.xcpu.org/xcpu-talk.pdf
+ http://xcpu.org/papers/xcpu-talk.pdf
* KVMFS: control file system for KVM
- http://www.xcpu.org/kvmfs.pdf
- * CellFS: A New ProgrammingModel for the Cell BE
- http://www.xcpu.org/cellfs-talk.pdf
+ http://xcpu.org/papers/kvmfs.pdf
+ * CellFS: A New Programming Model for the Cell BE
+ http://xcpu.org/papers/cellfs-talk.pdf
* PROSE I/O: Using 9p to enable Application Partitions
http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
@@ -48,6 +48,7 @@ OPTIONS
(see rfdno and wfdno)
virtio - connect to the next virtio channel available
(from lguest or KVM with trans_virtio module)
+ rdma - connect to a specified RDMA channel
uname=name user name to attempt mount as on the remote server. The
server may override or ignore this value. Certain user
@@ -59,16 +60,22 @@ OPTIONS
cache=mode specifies a caching policy. By default, no caches are used.
loose = no attempts are made at consistency,
intended for exclusive, read-only mounts
+ fscache = use FS-Cache for a persistent, read-only
+ cache backend.
debug=n specifies debug level. The debug level is a bitmask.
- 0x01 = display verbose error messages
- 0x02 = developer debug (DEBUG_CURRENT)
- 0x04 = display 9p trace
- 0x08 = display VFS trace
- 0x10 = display Marshalling debug
- 0x20 = display RPC debug
- 0x40 = display transport debug
- 0x80 = display allocation debug
+ 0x01 = display verbose error messages
+ 0x02 = developer debug (DEBUG_CURRENT)
+ 0x04 = display 9p trace
+ 0x08 = display VFS trace
+ 0x10 = display Marshalling debug
+ 0x20 = display RPC debug
+ 0x40 = display transport debug
+ 0x80 = display allocation debug
+ 0x100 = display protocol message debug
+ 0x200 = display Fid debug
+ 0x400 = display packet debug
+ 0x800 = display fscache tracing debug
rfdno=n the file descriptor for reading with trans=fd
@@ -100,6 +107,10 @@ OPTIONS
any = v9fs does single attach and performs all
operations as one user
+ cachetag cache tag to use the specified persistent cache.
+ cache tags for existing cache sessions can be listed at
+ /sys/fs/9p/caches. (applies only to cache=fscache)
+
RESOURCES
=========
@@ -118,7 +129,7 @@ and export.
A Linux version of the 9p server is now maintained under the npfs project
on sourceforge (http://sourceforge.net/projects/npfs). The currently
maintained version is the single-threaded version of the server (named spfs)
-available from the same CVS repository.
+available from the same SVN repository.
There are user and developer mailing lists available through the v9fs project
on sourceforge (http://sourceforge.net/projects/v9fs).
@@ -126,7 +137,8 @@ on sourceforge (http://sourceforge.net/projects/v9fs).
A stand-alone version of the module (which should build for any 2.6 kernel)
is available via (http://github.com/ericvh/9p-sac/tree/master)
-News and other information is maintained on SWiK (http://swik.net/v9fs).
+News and other information is maintained on SWiK (http://swik.net/v9fs)
+and the Wiki (http://sf.net/apps/mediawiki/v9fs/index.php).
Bug reports may be issued through the kernel.org bugzilla
(http://bugzilla.kernel.org)
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
index 736540045dc..23a181074f9 100644
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -4,7 +4,7 @@ Shared Subtrees
Contents:
1) Overview
2) Features
- 3) smount command
+ 3) Setting mount states
4) Use-case
5) Detailed semantics
6) Quiz
@@ -41,14 +41,14 @@ replicas continue to be exactly same.
Here is an example:
- Lets say /mnt has a mount that is shared.
+ Let's say /mnt has a mount that is shared.
mount --make-shared /mnt
- note: mount command does not yet support the --make-shared flag.
- I have included a small C program which does the same by executing
- 'smount /mnt shared'
+ Note: mount(8) command now supports the --make-shared flag,
+ so the sample 'smount' program is no longer needed and has been
+ removed.
- #mount --bind /mnt /tmp
+ # mount --bind /mnt /tmp
The above command replicates the mount at /mnt to the mountpoint /tmp
and the contents of both the mounts remain identical.
@@ -58,8 +58,8 @@ replicas continue to be exactly same.
#ls /tmp
a b c
- Now lets say we mount a device at /tmp/a
- #mount /dev/sd0 /tmp/a
+ Now let's say we mount a device at /tmp/a
+ # mount /dev/sd0 /tmp/a
#ls /tmp/a
t1 t2 t2
@@ -80,21 +80,20 @@ replicas continue to be exactly same.
Here is an example:
- Lets say /mnt has a mount which is shared.
- #mount --make-shared /mnt
+ Let's say /mnt has a mount which is shared.
+ # mount --make-shared /mnt
- Lets bind mount /mnt to /tmp
- #mount --bind /mnt /tmp
+ Let's bind mount /mnt to /tmp
+ # mount --bind /mnt /tmp
the new mount at /tmp becomes a shared mount and it is a replica of
the mount at /mnt.
- Now lets make the mount at /tmp; a slave of /mnt
- #mount --make-slave /tmp
- [or smount /tmp slave]
+ Now let's make the mount at /tmp; a slave of /mnt
+ # mount --make-slave /tmp
- lets mount /dev/sd0 on /mnt/a
- #mount /dev/sd0 /mnt/a
+ let's mount /dev/sd0 on /mnt/a
+ # mount /dev/sd0 /mnt/a
#ls /mnt/a
t1 t2 t3
@@ -104,9 +103,9 @@ replicas continue to be exactly same.
Note the mount event has propagated to the mount at /tmp
- However lets see what happens if we mount something on the mount at /tmp
+ However let's see what happens if we mount something on the mount at /tmp
- #mount /dev/sd1 /tmp/b
+ # mount /dev/sd1 /tmp/b
#ls /tmp/b
s1 s2 s3
@@ -124,12 +123,11 @@ replicas continue to be exactly same.
2d) A unbindable mount is a unbindable private mount
- lets say we have a mount at /mnt and we make is unbindable
+ let's say we have a mount at /mnt and we make is unbindable
- #mount --make-unbindable /mnt
- [ smount /mnt unbindable ]
+ # mount --make-unbindable /mnt
- Lets try to bind mount this mount somewhere else.
+ Let's try to bind mount this mount somewhere else.
# mount --bind /mnt /tmp
mount: wrong fs type, bad option, bad superblock on /mnt,
or too many mounted file systems
@@ -137,149 +135,15 @@ replicas continue to be exactly same.
Binding a unbindable mount is a invalid operation.
-3) smount command
+3) Setting mount states
- Currently the mount command is not aware of shared subtree features.
- Work is in progress to add the support in mount ( util-linux package ).
- Till then use the following program.
+ The mount command (util-linux package) can be used to set mount
+ states:
- ------------------------------------------------------------------------
- //
- //this code was developed my Miklos Szeredi <miklos@szeredi.hu>
- //and modified by Ram Pai <linuxram@us.ibm.com>
- // sample usage:
- // smount /tmp shared
- //
- #include <stdio.h>
- #include <stdlib.h>
- #include <unistd.h>
- #include <string.h>
- #include <sys/mount.h>
- #include <sys/fsuid.h>
-
- #ifndef MS_REC
- #define MS_REC 0x4000 /* 16384: Recursive loopback */
- #endif
-
- #ifndef MS_SHARED
- #define MS_SHARED 1<<20 /* Shared */
- #endif
-
- #ifndef MS_PRIVATE
- #define MS_PRIVATE 1<<18 /* Private */
- #endif
-
- #ifndef MS_SLAVE
- #define MS_SLAVE 1<<19 /* Slave */
- #endif
-
- #ifndef MS_UNBINDABLE
- #define MS_UNBINDABLE 1<<17 /* Unbindable */
- #endif
-
- int main(int argc, char *argv[])
- {
- int type;
- if(argc != 3) {
- fprintf(stderr, "usage: %s dir "
- "<rshared|rslave|rprivate|runbindable|shared|slave"
- "|private|unbindable>\n" , argv[0]);
- return 1;
- }
-
- fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]);
-
- if (strcmp(argv[2],"rshared")==0)
- type=(MS_SHARED|MS_REC);
- else if (strcmp(argv[2],"rslave")==0)
- type=(MS_SLAVE|MS_REC);
- else if (strcmp(argv[2],"rprivate")==0)
- type=(MS_PRIVATE|MS_REC);
- else if (strcmp(argv[2],"runbindable")==0)
- type=(MS_UNBINDABLE|MS_REC);
- else if (strcmp(argv[2],"shared")==0)
- type=MS_SHARED;
- else if (strcmp(argv[2],"slave")==0)
- type=MS_SLAVE;
- else if (strcmp(argv[2],"private")==0)
- type=MS_PRIVATE;
- else if (strcmp(argv[2],"unbindable")==0)
- type=MS_UNBINDABLE;
- else {
- fprintf(stderr, "invalid operation: %s\n", argv[2]);
- return 1;
- }
- setfsuid(getuid());
-
- if(mount("", argv[1], "dontcare", type, "") == -1) {
- perror("mount");
- return 1;
- }
- return 0;
- }
- -----------------------------------------------------------------------
-
- Copy the above code snippet into smount.c
- gcc -o smount smount.c
-
-
- (i) To mark all the mounts under /mnt as shared execute the following
- command:
-
- smount /mnt rshared
- the corresponding syntax planned for mount command is
- mount --make-rshared /mnt
-
- just to mark a mount /mnt as shared, execute the following
- command:
- smount /mnt shared
- the corresponding syntax planned for mount command is
- mount --make-shared /mnt
-
- (ii) To mark all the shared mounts under /mnt as slave execute the
- following
-
- command:
- smount /mnt rslave
- the corresponding syntax planned for mount command is
- mount --make-rslave /mnt
-
- just to mark a mount /mnt as slave, execute the following
- command:
- smount /mnt slave
- the corresponding syntax planned for mount command is
- mount --make-slave /mnt
-
- (iii) To mark all the mounts under /mnt as private execute the
- following command:
-
- smount /mnt rprivate
- the corresponding syntax planned for mount command is
- mount --make-rprivate /mnt
-
- just to mark a mount /mnt as private, execute the following
- command:
- smount /mnt private
- the corresponding syntax planned for mount command is
- mount --make-private /mnt
-
- NOTE: by default all the mounts are created as private. But if
- you want to change some shared/slave/unbindable mount as
- private at a later point in time, this command can help.
-
- (iv) To mark all the mounts under /mnt as unbindable execute the
- following
-
- command:
- smount /mnt runbindable
- the corresponding syntax planned for mount command is
- mount --make-runbindable /mnt
-
- just to mark a mount /mnt as unbindable, execute the following
- command:
- smount /mnt unbindable
- the corresponding syntax planned for mount command is
- mount --make-unbindable /mnt
+ mount --make-shared mountpoint
+ mount --make-slave mountpoint
+ mount --make-private mountpoint
+ mount --make-unbindable mountpoint
4) Use cases
@@ -350,7 +214,7 @@ replicas continue to be exactly same.
mount --rbind / /view/v3
mount --rbind / /view/v4
- and if /usr has a versioning filesystem mounted, than that
+ and if /usr has a versioning filesystem mounted, then that
mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
/view/v4/usr too
@@ -390,7 +254,7 @@ replicas continue to be exactly same.
For example:
mount --make-shared /mnt
- mount --bin /mnt /tmp
+ mount --bind /mnt /tmp
The mount at /mnt and that at /tmp are both shared and belong
to the same peer group. Anything mounted or unmounted under
@@ -558,7 +422,7 @@ replicas continue to be exactly same.
then the subtree under the unbindable mount is pruned in the new
location.
- eg: lets say we have the following mount tree.
+ eg: let's say we have the following mount tree.
A
/ \
@@ -566,7 +430,7 @@ replicas continue to be exactly same.
/ \ / \
D E F G
- Lets say all the mount except the mount C in the tree are
+ Let's say all the mount except the mount C in the tree are
of a type other than unbindable.
If this tree is rbound to say Z
@@ -683,13 +547,13 @@ replicas continue to be exactly same.
'b' on mounts that receive propagation from mount 'B' and does not have
sub-mounts within them are unmounted.
- Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to
+ Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
each other.
- lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
+ let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
'B1', 'B2' and 'B3' respectively.
- lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
+ let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
mount 'B1', 'B2' and 'B3' respectively.
if 'C1' is unmounted, all the mounts that are most-recently-mounted on
@@ -710,7 +574,7 @@ replicas continue to be exactly same.
A cloned namespace contains all the mounts as that of the parent
namespace.
- Lets say 'A' and 'B' are the corresponding mounts in the parent and the
+ Let's say 'A' and 'B' are the corresponding mounts in the parent and the
child namespace.
If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
@@ -759,11 +623,11 @@ replicas continue to be exactly same.
mount --make-slave /mnt
At this point we have the first mount at /tmp and
- its root dentry is 1. Lets call this mount 'A'
+ its root dentry is 1. Let's call this mount 'A'
And then we have a second mount at /tmp1 with root
- dentry 2. Lets call this mount 'B'
+ dentry 2. Let's call this mount 'B'
Next we have a third mount at /mnt with root dentry
- mnt. Lets call this mount 'C'
+ mnt. Let's call this mount 'C'
'B' is the slave of 'A' and 'C' is a slave of 'B'
A -> B -> C
@@ -794,7 +658,7 @@ replicas continue to be exactly same.
Q3 Why is unbindable mount needed?
- Lets say we want to replicate the mount tree at multiple
+ Let's say we want to replicate the mount tree at multiple
locations within the same subtree.
if one rbind mounts a tree within the same subtree 'n' times
@@ -803,7 +667,7 @@ replicas continue to be exactly same.
mounts. Here is a example.
step 1:
- lets say the root tree has just two directories with
+ let's say the root tree has just two directories with
one vfsmount.
root
/ \
@@ -875,7 +739,7 @@ replicas continue to be exactly same.
Unclonable mounts come in handy here.
step 1:
- lets say the root tree has just two directories with
+ let's say the root tree has just two directories with
one vfsmount.
root
/ \
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f49eecf2e57..623f094c9d8 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -536,6 +536,7 @@ struct address_space_operations {
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
int (*launder_page) (struct page *);
+ int (*error_remove_page) (struct mapping *mapping, struct page *page);
};
writepage: called by the VM to write a dirty page to backing store.
@@ -694,6 +695,12 @@ struct address_space_operations {
prevent redirtying the page, it is kept locked during the whole
operation.
+ error_remove_page: normally set to generic_error_remove_page if truncation
+ is ok for this address space. Used for memory failure handling.
+ Setting this implies you deal with pages going away under you,
+ unless you have them locked or reference counts increased.
+
+
The File Object
===============
diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp
index dbbe6c7025b..92267b62db5 100644
--- a/Documentation/hwmon/coretemp
+++ b/Documentation/hwmon/coretemp
@@ -4,7 +4,9 @@ Kernel driver coretemp
Supported chips:
* All Intel Core family
Prefix: 'coretemp'
- CPUID: family 0x6, models 0xe, 0xf, 0x16, 0x17
+ CPUID: family 0x6, models 0xe (Pentium M DC), 0xf (Core 2 DC 65nm),
+ 0x16 (Core 2 SC 65nm), 0x17 (Penryn 45nm),
+ 0x1a (Nehalem), 0x1c (Atom), 0x1e (Lynnfield)
Datasheet: Intel 64 and IA-32 Architectures Software Developer's Manual
Volume 3A: System Programming Guide
http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
diff --git a/Documentation/hwmon/fscher b/Documentation/hwmon/fscher
deleted file mode 100644
index 64031659aff..00000000000
--- a/Documentation/hwmon/fscher
+++ /dev/null
@@ -1,169 +0,0 @@
-Kernel driver fscher
-====================
-
-Supported chips:
- * Fujitsu-Siemens Hermes chip
- Prefix: 'fscher'
- Addresses scanned: I2C 0x73
-
-Authors:
- Reinhard Nissl <rnissl@gmx.de> based on work
- from Hermann Jung <hej@odn.de>,
- Frodo Looijaard <frodol@dds.nl>,
- Philip Edelbrock <phil@netroedge.com>
-
-Description
------------
-
-This driver implements support for the Fujitsu-Siemens Hermes chip. It is
-described in the 'Register Set Specification BMC Hermes based Systemboard'
-from Fujitsu-Siemens.
-
-The Hermes chip implements a hardware-based system management, e.g. for
-controlling fan speed and core voltage. There is also a watchdog counter on
-the chip which can trigger an alarm and even shut the system down.
-
-The chip provides three temperature values (CPU, motherboard and
-auxiliary), three voltage values (+12V, +5V and battery) and three fans
-(power supply, CPU and auxiliary).
-
-Temperatures are measured in degrees Celsius. The resolution is 1 degree.
-
-Fan rotation speeds are reported in RPM (rotations per minute). The value
-can be divided by a programmable divider (1, 2 or 4) which is stored on
-the chip.
-
-Voltage sensors (also known as "in" sensors) report their values in volts.
-
-All values are reported as final values from the driver. There is no need
-for further calculations.
-
-
-Detailed description
---------------------
-
-Below you'll find a single line description of all the bit values. With
-this information, you're able to decode e. g. alarms, wdog, etc. To make
-use of the watchdog, you'll need to set the watchdog time and enable the
-watchdog. After that it is necessary to restart the watchdog time within
-the specified period of time, or a system reset will occur.
-
-* revision
- READING & 0xff = 0x??: HERMES revision identification
-
-* alarms
- READING & 0x80 = 0x80: CPU throttling active
- READING & 0x80 = 0x00: CPU running at full speed
-
- READING & 0x10 = 0x10: software event (see control:1)
- READING & 0x10 = 0x00: no software event
-
- READING & 0x08 = 0x08: watchdog event (see wdog:2)
- READING & 0x08 = 0x00: no watchdog event
-
- READING & 0x02 = 0x02: thermal event (see temp*:1)
- READING & 0x02 = 0x00: no thermal event
-
- READING & 0x01 = 0x01: fan event (see fan*:1)
- READING & 0x01 = 0x00: no fan event
-
- READING & 0x13 ! 0x00: ALERT LED is flashing
-
-* control
- READING & 0x01 = 0x01: software event
- READING & 0x01 = 0x00: no software event
-
- WRITING & 0x01 = 0x01: set software event
- WRITING & 0x01 = 0x00: clear software event
-
-* watchdog_control
- READING & 0x80 = 0x80: power off on watchdog event while thermal event
- READING & 0x80 = 0x00: watchdog power off disabled (just system reset enabled)
-
- READING & 0x40 = 0x40: watchdog timebase 60 seconds (see also wdog:1)
- READING & 0x40 = 0x00: watchdog timebase 2 seconds
-
- READING & 0x10 = 0x10: watchdog enabled
- READING & 0x10 = 0x00: watchdog disabled
-
- WRITING & 0x80 = 0x80: enable "power off on watchdog event while thermal event"
- WRITING & 0x80 = 0x00: disable "power off on watchdog event while thermal event"
-
- WRITING & 0x40 = 0x40: set watchdog timebase to 60 seconds
- WRITING & 0x40 = 0x00: set watchdog timebase to 2 seconds
-
- WRITING & 0x20 = 0x20: disable watchdog
-
- WRITING & 0x10 = 0x10: enable watchdog / restart watchdog time
-
-* watchdog_state
- READING & 0x02 = 0x02: watchdog system reset occurred
- READING & 0x02 = 0x00: no watchdog system reset occurred
-
- WRITING & 0x02 = 0x02: clear watchdog event
-
-* watchdog_preset
- READING & 0xff = 0x??: configured watch dog time in units (see wdog:3 0x40)
-
- WRITING & 0xff = 0x??: configure watch dog time in units
-
-* in* (0: +5V, 1: +12V, 2: onboard 3V battery)
- READING: actual voltage value
-
-* temp*_status (1: CPU sensor, 2: onboard sensor, 3: auxiliary sensor)
- READING & 0x02 = 0x02: thermal event (overtemperature)
- READING & 0x02 = 0x00: no thermal event
-
- READING & 0x01 = 0x01: sensor is working
- READING & 0x01 = 0x00: sensor is faulty
-
- WRITING & 0x02 = 0x02: clear thermal event
-
-* temp*_input (1: CPU sensor, 2: onboard sensor, 3: auxiliary sensor)
- READING: actual temperature value
-
-* fan*_status (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
- READING & 0x04 = 0x04: fan event (fan fault)
- READING & 0x04 = 0x00: no fan event
-
- WRITING & 0x04 = 0x04: clear fan event
-
-* fan*_div (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
- Divisors 2,4 and 8 are supported, both for reading and writing
-
-* fan*_pwm (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
- READING & 0xff = 0x00: fan may be switched off
- READING & 0xff = 0x01: fan must run at least at minimum speed (supply: 6V)
- READING & 0xff = 0xff: fan must run at maximum speed (supply: 12V)
- READING & 0xff = 0x??: fan must run at least at given speed (supply: 6V..12V)
-
- WRITING & 0xff = 0x00: fan may be switched off
- WRITING & 0xff = 0x01: fan must run at least at minimum speed (supply: 6V)
- WRITING & 0xff = 0xff: fan must run at maximum speed (supply: 12V)
- WRITING & 0xff = 0x??: fan must run at least at given speed (supply: 6V..12V)
-
-* fan*_input (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
- READING: actual RPM value
-
-
-Limitations
------------
-
-* Measuring fan speed
-It seems that the chip counts "ripples" (typical fans produce 2 ripples per
-rotation while VERAX fans produce 18) in a 9-bit register. This register is
-read out every second, then the ripple prescaler (2, 4 or 8) is applied and
-the result is stored in the 8 bit output register. Due to the limitation of
-the counting register to 9 bits, it is impossible to measure a VERAX fan
-properly (even with a prescaler of 8). At its maximum speed of 3500 RPM the
-fan produces 1080 ripples per second which causes the counting register to
-overflow twice, leading to only 186 RPM.
-
-* Measuring input voltages
-in2 ("battery") reports the voltage of the onboard lithium battery and not
-+3.3V from the power supply.
-
-* Undocumented features
-Fujitsu-Siemens Computers has not documented all features of the chip so
-far. Their software, System Guard, shows that there are a still some
-features which cannot be controlled by this implementation.
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index aafca0a8f66..947374977ca 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -135,6 +135,7 @@ Code Seq# Include File Comments
<http://mikonos.dia.unisa.it/tcfs>
'l' 40-7F linux/udf_fs_i.h in development:
<http://sourceforge.net/projects/linux-udf/>
+'m' 00-09 linux/mmtimer.h
'm' all linux/mtio.h conflict!
'm' all linux/soundcard.h conflict!
'm' all linux/synclink.h conflict!
diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index f3355b6812d..bb3bf38f03d 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -65,6 +65,22 @@ INSTALL_PATH
INSTALL_PATH specifies where to place the updated kernel and system map
images. Default is /boot, but you can set it to other values.
+INSTALLKERNEL
+--------------------------------------------------
+Install script called when using "make install".
+The default name is "installkernel".
+
+The script will be called with the following arguments:
+ $1 - kernel version
+ $2 - kernel image file
+ $3 - kernel map file
+ $4 - default install path (use root directory if blank)
+
+The implmentation of "make install" is architecture specific
+and it may differ from the above.
+
+INSTALLKERNEL is provided to enable the possibility to
+specify a custom installer when cross compiling a kernel.
MODLIB
--------------------------------------------------
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index d76cfd8712e..71c602d6168 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -18,6 +18,7 @@ This document describes the Linux kernel Makefiles.
--- 3.9 Dependency tracking
--- 3.10 Special Rules
--- 3.11 $(CC) support functions
+ --- 3.12 $(LD) support functions
=== 4 Host Program support
--- 4.1 Simple Host Program
@@ -435,14 +436,14 @@ more details, with real examples.
The second argument is optional, and if supplied will be used
if first argument is not supported.
- ld-option
- ld-option is used to check if $(CC) when used to link object files
+ cc-ldoption
+ cc-ldoption is used to check if $(CC) when used to link object files
supports the given option. An optional second option may be
specified if first option are not supported.
Example:
#arch/i386/kernel/Makefile
- vsyscall-flags += $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
In the above example, vsyscall-flags will be assigned the option
-Wl$(comma)--hash-style=sysv if it is supported by $(CC).
@@ -570,6 +571,19 @@ more details, with real examples.
endif
endif
+--- 3.12 $(LD) support functions
+
+ ld-option
+ ld-option is used to check if $(LD) supports the supplied option.
+ ld-option takes two options as arguments.
+ The second argument is an optional option that can be used if the
+ first option is not supported by $(LD).
+
+ Example:
+ #Makefile
+ LDFLAGS_vmlinux += $(call really-ld-option, -X)
+
+
=== 4 Host Program support
Kbuild supports building executables on the host for use during the
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 1458448436c..62682500878 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -96,13 +96,16 @@ handles that the Linux kernel will allocate. When you get lots
of error messages about running out of file handles, you might
want to increase this limit.
-The three values in file-nr denote the number of allocated
-file handles, the number of unused file handles and the maximum
-number of file handles. When the allocated file handles come
-close to the maximum, but the number of unused file handles is
-significantly greater than 0, you've encountered a peak in your
-usage of file handles and you don't need to increase the maximum.
-
+Historically, the three values in file-nr denoted the number of
+allocated file handles, the number of allocated but unused file
+handles, and the maximum number of file handles. Linux 2.6 always
+reports 0 as the number of free file handles -- this is not an
+error, it just means that the number of allocated file handles
+exactly matches the number of used file handles.
+
+Attempts to allocate more file descriptors than file-max are
+reported with printk, look for "VFS: file-max limit <number>
+reached".
==============================================================
nr_open:
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index b3d8b492274..a028b92001e 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -22,6 +22,7 @@ show up in /proc/sys/kernel:
- callhome [ S390 only ]
- auto_msgmni
- core_pattern
+- core_pipe_limit
- core_uses_pid
- ctrl-alt-del
- dentry-state
@@ -135,6 +136,27 @@ core_pattern is used to specify a core dumpfile pattern name.
==============================================================
+core_pipe_limit:
+
+This sysctl is only applicable when core_pattern is configured to pipe core
+files to user space helper a (when the first character of core_pattern is a '|',
+see above). When collecting cores via a pipe to an application, it is
+occasionally usefull for the collecting application to gather data about the
+crashing process from its /proc/pid directory. In order to do this safely, the
+kernel must wait for the collecting process to exit, so as not to remove the
+crashing processes proc files prematurely. This in turn creates the possibility
+that a misbehaving userspace collecting process can block the reaping of a
+crashed process simply by never exiting. This sysctl defends against that. It
+defines how many concurrent crashing processes may be piped to user space
+applications in parallel. If this value is exceeded, then those crashing
+processes above that value are noted via the kernel log and their cores are
+skipped. 0 is a special value, indicating that unlimited processes may be
+captured in parallel, but that no waiting will take place (i.e. the collecting
+process is not guaranteed access to /proc/<crahing pid>/). This value defaults
+to 0.
+
+==============================================================
+
core_uses_pid:
The default coredump filename is "core". By setting
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index e6fb1ec2744..a6e360d2055 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm:
- legacy_va_layout
- lowmem_reserve_ratio
- max_map_count
+- memory_failure_early_kill
+- memory_failure_recovery
- min_free_kbytes
- min_slab_ratio
- min_unmapped_ratio
@@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm:
- vfs_cache_pressure
- zone_reclaim_mode
-
==============================================================
block_dump
@@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation.
The default value is 65536.
+=============================================================
+
+memory_failure_early_kill:
+
+Control how to kill processes when uncorrected memory error (typically
+a 2bit error in a memory module) is detected in the background by hardware
+that cannot be handled by the kernel. In some cases (like the page
+still having a valid copy on disk) the kernel will handle the failure
+transparently without affecting any applications. But if there is
+no other uptodate copy of the data it will kill to prevent any data
+corruptions from propagating.
+
+1: Kill all processes that have the corrupted and not reloadable page mapped
+as soon as the corruption is detected. Note this is not supported
+for a few types of pages, like kernel internally allocated data or
+the swap cache, but works for the majority of user pages.
+
+0: Only unmap the corrupted page from all processes and only kill a process
+who tries to access it.
+
+The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
+handle this if they want to.
+
+This is only active on architectures/platforms with advanced machine
+check handling and depends on the hardware capabilities.
+
+Applications can override this setting individually with the PR_MCE_KILL prctl
+
+==============================================================
+
+memory_failure_recovery
+
+Enable memory failure recovery (when supported by the platform)
+
+1: Attempt recovery.
+
+0: Always panic on a memory failure.
+
==============================================================
min_free_kbytes:
diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore
index 33e8a023df0..09b164a5700 100644
--- a/Documentation/vm/.gitignore
+++ b/Documentation/vm/.gitignore
@@ -1 +1,2 @@
+page-types
slabinfo
diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index f366fa95617..25fadb44876 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
mm start up ... this is a loose form of stability on mm_users. For
example, it is used in copy_mm to protect against a racing tlb_gather_mmu
single address space optimization, so that the zap_page_range (from
-vmtruncate) does not lose sending ipi's to cloned threads that might
+truncate) does not lose sending ipi's to cloned threads that might
be spawned underneath it and go to user mode to drag in pte's into tlbs.
swap_lock
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index 3eda8ea0085..fa1a30d9e9d 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -5,6 +5,7 @@
* Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
*/
+#define _LARGEFILE64_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -13,12 +14,33 @@
#include <string.h>
#include <getopt.h>
#include <limits.h>
+#include <assert.h>
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
/*
+ * pagemap kernel ABI bits
+ */
+
+#define PM_ENTRY_BYTES sizeof(uint64_t)
+#define PM_STATUS_BITS 3
+#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
+#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
+#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
+#define PM_PSHIFT_BITS 6
+#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
+#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
+#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
+#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
+#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
+
+#define PM_PRESENT PM_STATUS(4LL)
+#define PM_SWAP PM_STATUS(2LL)
+
+
+/*
* kernel page flags
*/
@@ -126,6 +148,14 @@ static int nr_addr_ranges;
static unsigned long opt_offset[MAX_ADDR_RANGES];
static unsigned long opt_size[MAX_ADDR_RANGES];
+#define MAX_VMAS 10240
+static int nr_vmas;
+static unsigned long pg_start[MAX_VMAS];
+static unsigned long pg_end[MAX_VMAS];
+static unsigned long voffset;
+
+static int pagemap_fd;
+
#define MAX_BIT_FILTERS 64
static int nr_bit_filters;
static uint64_t opt_mask[MAX_BIT_FILTERS];
@@ -135,7 +165,6 @@ static int page_size;
#define PAGES_BATCH (64 << 10) /* 64k pages */
static int kpageflags_fd;
-static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
#define HASH_SHIFT 13
#define HASH_SIZE (1 << HASH_SHIFT)
@@ -158,6 +187,11 @@ static uint64_t page_flags[HASH_SIZE];
type __min2 = (y); \
__min1 < __min2 ? __min1 : __min2; })
+#define max_t(type, x, y) ({ \
+ type __max1 = (x); \
+ type __max2 = (y); \
+ __max1 > __max2 ? __max1 : __max2; })
+
static unsigned long pages2mb(unsigned long pages)
{
return (pages * page_size) >> 20;
@@ -224,26 +258,34 @@ static char *page_flag_longname(uint64_t flags)
static void show_page_range(unsigned long offset, uint64_t flags)
{
static uint64_t flags0;
+ static unsigned long voff;
static unsigned long index;
static unsigned long count;
- if (flags == flags0 && offset == index + count) {
+ if (flags == flags0 && offset == index + count &&
+ (!opt_pid || voffset == voff + count)) {
count++;
return;
}
- if (count)
- printf("%lu\t%lu\t%s\n",
+ if (count) {
+ if (opt_pid)
+ printf("%lx\t", voff);
+ printf("%lx\t%lx\t%s\n",
index, count, page_flag_name(flags0));
+ }
flags0 = flags;
index = offset;
+ voff = voffset;
count = 1;
}
static void show_page(unsigned long offset, uint64_t flags)
{
- printf("%lu\t%s\n", offset, page_flag_name(flags));
+ if (opt_pid)
+ printf("%lx\t", voffset);
+ printf("%lx\t%s\n", offset, page_flag_name(flags));
}
static void show_summary(void)
@@ -383,6 +425,8 @@ static void walk_pfn(unsigned long index, unsigned long count)
lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
while (count) {
+ uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
+
batch = min_t(unsigned long, count, PAGES_BATCH);
n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
if (n == 0)
@@ -404,6 +448,81 @@ static void walk_pfn(unsigned long index, unsigned long count)
}
}
+
+#define PAGEMAP_BATCH 4096
+static unsigned long task_pfn(unsigned long pgoff)
+{
+ static uint64_t buf[PAGEMAP_BATCH];
+ static unsigned long start;
+ static long count;
+ uint64_t pfn;
+
+ if (pgoff < start || pgoff >= start + count) {
+ if (lseek64(pagemap_fd,
+ (uint64_t)pgoff * PM_ENTRY_BYTES,
+ SEEK_SET) < 0) {
+ perror("pagemap seek");
+ exit(EXIT_FAILURE);
+ }
+ count = read(pagemap_fd, buf, sizeof(buf));
+ if (count == 0)
+ return 0;
+ if (count < 0) {
+ perror("pagemap read");
+ exit(EXIT_FAILURE);
+ }
+ if (count % PM_ENTRY_BYTES) {
+ fatal("pagemap read not aligned.\n");
+ exit(EXIT_FAILURE);
+ }
+ count /= PM_ENTRY_BYTES;
+ start = pgoff;
+ }
+
+ pfn = buf[pgoff - start];
+ if (pfn & PM_PRESENT)
+ pfn = PM_PFRAME(pfn);
+ else
+ pfn = 0;
+
+ return pfn;
+}
+
+static void walk_task(unsigned long index, unsigned long count)
+{
+ int i = 0;
+ const unsigned long end = index + count;
+
+ while (index < end) {
+
+ while (pg_end[i] <= index)
+ if (++i >= nr_vmas)
+ return;
+ if (pg_start[i] >= end)
+ return;
+
+ voffset = max_t(unsigned long, pg_start[i], index);
+ index = min_t(unsigned long, pg_end[i], end);
+
+ assert(voffset < index);
+ for (; voffset < index; voffset++) {
+ unsigned long pfn = task_pfn(voffset);
+ if (pfn)
+ walk_pfn(pfn, 1);
+ }
+ }
+}
+
+static void add_addr_range(unsigned long offset, unsigned long size)
+{
+ if (nr_addr_ranges >= MAX_ADDR_RANGES)
+ fatal("too many addr ranges\n");
+
+ opt_offset[nr_addr_ranges] = offset;
+ opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
+ nr_addr_ranges++;
+}
+
static void walk_addr_ranges(void)
{
int i;
@@ -415,10 +534,13 @@ static void walk_addr_ranges(void)
}
if (!nr_addr_ranges)
- walk_pfn(0, ULONG_MAX);
+ add_addr_range(0, ULONG_MAX);
for (i = 0; i < nr_addr_ranges; i++)
- walk_pfn(opt_offset[i], opt_size[i]);
+ if (!opt_pid)
+ walk_pfn(opt_offset[i], opt_size[i]);
+ else
+ walk_task(opt_offset[i], opt_size[i]);
close(kpageflags_fd);
}
@@ -446,8 +568,8 @@ static void usage(void)
" -r|--raw Raw mode, for kernel developers\n"
" -a|--addr addr-spec Walk a range of pages\n"
" -b|--bits bits-spec Walk pages with specified bits\n"
-#if 0 /* planned features */
" -p|--pid pid Walk process address space\n"
+#if 0 /* planned features */
" -f|--file filename Walk file address space\n"
#endif
" -l|--list Show page details in ranges\n"
@@ -459,7 +581,7 @@ static void usage(void)
" N+M pages range from N to N+M-1\n"
" N,M pages range from N to M-1\n"
" N, pages range from N to end\n"
-" ,M pages range from 0 to M\n"
+" ,M pages range from 0 to M-1\n"
"bits-spec:\n"
" bit1,bit2 (flags & (bit1|bit2)) != 0\n"
" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n"
@@ -496,21 +618,57 @@ static unsigned long long parse_number(const char *str)
static void parse_pid(const char *str)
{
+ FILE *file;
+ char buf[5000];
+
opt_pid = parse_number(str);
-}
-static void parse_file(const char *name)
-{
+ sprintf(buf, "/proc/%d/pagemap", opt_pid);
+ pagemap_fd = open(buf, O_RDONLY);
+ if (pagemap_fd < 0) {
+ perror(buf);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(buf, "/proc/%d/maps", opt_pid);
+ file = fopen(buf, "r");
+ if (!file) {
+ perror(buf);
+ exit(EXIT_FAILURE);
+ }
+
+ while (fgets(buf, sizeof(buf), file) != NULL) {
+ unsigned long vm_start;
+ unsigned long vm_end;
+ unsigned long long pgoff;
+ int major, minor;
+ char r, w, x, s;
+ unsigned long ino;
+ int n;
+
+ n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
+ &vm_start,
+ &vm_end,
+ &r, &w, &x, &s,
+ &pgoff,
+ &major, &minor,
+ &ino);
+ if (n < 10) {
+ fprintf(stderr, "unexpected line: %s\n", buf);
+ continue;
+ }
+ pg_start[nr_vmas] = vm_start / page_size;
+ pg_end[nr_vmas] = vm_end / page_size;
+ if (++nr_vmas >= MAX_VMAS) {
+ fprintf(stderr, "too many VMAs\n");
+ break;
+ }
+ }
+ fclose(file);
}
-static void add_addr_range(unsigned long offset, unsigned long size)
+static void parse_file(const char *name)
{
- if (nr_addr_ranges >= MAX_ADDR_RANGES)
- fatal("too much addr ranges\n");
-
- opt_offset[nr_addr_ranges] = offset;
- opt_size[nr_addr_ranges] = size;
- nr_addr_ranges++;
}
static void parse_addr_range(const char *optarg)
@@ -676,8 +834,10 @@ int main(int argc, char *argv[])
}
}
+ if (opt_list && opt_pid)
+ printf("voffset\t");
if (opt_list == 1)
- printf("offset\tcount\tflags\n");
+ printf("offset\tlen\tflags\n");
if (opt_list == 2)
printf("offset\tflags\n");
diff --git a/MAINTAINERS b/MAINTAINERS
index 7c1c0b05b29..e797c4d48cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -257,12 +257,6 @@ W: http://www.lesswatts.org/projects/acpi/
S: Supported
F: drivers/acpi/fan.c
-ACPI PCI HOTPLUG DRIVER
-M: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
-L: linux-pci@vger.kernel.org
-S: Supported
-F: drivers/pci/hotplug/acpi*
-
ACPI THERMAL DRIVER
M: Zhang Rui <rui.zhang@intel.com>
L: linux-acpi@vger.kernel.org
@@ -2331,7 +2325,9 @@ S: Orphan
F: drivers/hwmon/
HARDWARE RANDOM NUMBER GENERATOR CORE
-S: Orphan
+M: Matt Mackall <mpm@selenic.com>
+M: Herbert Xu <herbert@gondor.apana.org.au>
+S: Odd fixes
F: Documentation/hw_random.txt
F: drivers/char/hw_random/
F: include/linux/hw_random.h
@@ -4003,11 +3999,11 @@ F: Documentation/PCI/
F: drivers/pci/
F: include/linux/pci*
-PCIE HOTPLUG DRIVER
-M: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
+PCI HOTPLUG
+M: Jesse Barnes <jbarnes@virtuousgeek.org>
L: linux-pci@vger.kernel.org
S: Supported
-F: drivers/pci/pcie/
+F: drivers/pci/hotplug
PCMCIA SUBSYSTEM
P: Linux PCMCIA Team
@@ -4670,12 +4666,6 @@ F: drivers/serial/serial_lh7a40x.c
F: drivers/usb/gadget/lh7a40*
F: drivers/usb/host/ohci-lh7a40*
-SHPC HOTPLUG DRIVER
-M: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
-L: linux-pci@vger.kernel.org
-S: Supported
-F: drivers/pci/hotplug/shpchp*
-
SIMPLE FIRMWARE INTERFACE (SFI)
P: Len Brown
M: lenb@kernel.org
@@ -4687,7 +4677,6 @@ F: arch/x86/kernel/*sfi*
F: drivers/sfi/
F: include/linux/sfi*.h
-
SIMTEC EB110ATX (Chalice CATS)
P: Ben Dooks
M: Vincent Sanders <support@simtec.co.uk>
diff --git a/Makefile b/Makefile
index 433493a2b77..f908accd332 100644
--- a/Makefile
+++ b/Makefile
@@ -179,9 +179,46 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
# Alternatively CROSS_COMPILE can be set in the environment.
# Default value for CROSS_COMPILE is not to prefix executables
# Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
+#
+# To force ARCH and CROSS_COMPILE settings include kernel.* files
+# in the kernel tree - do not patch this file.
export KBUILD_BUILDHOST := $(SUBARCH)
-ARCH ?= $(SUBARCH)
-CROSS_COMPILE ?=
+
+# Kbuild save the ARCH and CROSS_COMPILE setting in kernel.* files.
+# Restore these settings and check that user did not specify
+# conflicting values.
+
+saved_arch := $(shell cat include/generated/kernel.arch 2> /dev/null)
+saved_cross := $(shell cat include/generated/kernel.cross 2> /dev/null)
+
+ifneq ($(CROSS_COMPILE),)
+ ifneq ($(saved_cross),)
+ ifneq ($(CROSS_COMPILE),$(saved_cross))
+ $(error CROSS_COMPILE changed from \
+ "$(saved_cross)" to \
+ to "$(CROSS_COMPILE)". \
+ Use "make mrproper" to fix it up)
+ endif
+ endif
+else
+ CROSS_COMPILE := $(saved_cross)
+endif
+
+ifneq ($(ARCH),)
+ ifneq ($(saved_arch),)
+ ifneq ($(saved_arch),$(ARCH))
+ $(error ARCH changed from \
+ "$(saved_arch)" to "$(ARCH)". \
+ Use "make mrproper" to fix it up)
+ endif
+ endif
+else
+ ifneq ($(saved_arch),)
+ ARCH := $(saved_arch)
+ else
+ ARCH := $(SUBARCH)
+ endif
+endif
# Architecture as present in compile.h
UTS_MACHINE := $(ARCH)
@@ -315,6 +352,7 @@ OBJCOPY = $(CROSS_COMPILE)objcopy
OBJDUMP = $(CROSS_COMPILE)objdump
AWK = awk
GENKSYMS = scripts/genksyms/genksyms
+INSTALLKERNEL := installkernel
DEPMOD = /sbin/depmod
KALLSYMS = scripts/kallsyms
PERL = perl
@@ -353,7 +391,8 @@ KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE
+export CPP AR NM STRIP OBJCOPY OBJDUMP
+export MAKE AWK GENKSYMS INSTALLKERNEL PERL UTS_MACHINE
export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
@@ -444,6 +483,11 @@ ifeq ($(config-targets),1)
include $(srctree)/arch/$(SRCARCH)/Makefile
export KBUILD_DEFCONFIG KBUILD_KCONFIG
+# save ARCH & CROSS_COMPILE settings
+$(shell mkdir -p include/generated && \
+ echo $(ARCH) > include/generated/kernel.arch && \
+ echo $(CROSS_COMPILE) > include/generated/kernel.cross)
+
config: scripts_basic outputmakefile FORCE
$(Q)mkdir -p include/linux include/config
$(Q)$(MAKE) $(build)=scripts/kconfig $@
@@ -571,6 +615,9 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
# revert to pre-gcc-4.4 behaviour of .eh_frame
KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm)
+# conserve stack if available
+KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
+
# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
# But warn user when we do so
warn-assign = \
@@ -591,12 +638,12 @@ endif
# Use --build-id when available.
LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\
- $(call ld-option, -Wl$(comma)--build-id,))
+ $(call cc-ldoption, -Wl$(comma)--build-id,))
LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
-LDFLAGS_vmlinux += -X
+LDFLAGS_vmlinux += $(call ld-option, -X,)
endif
# Default kernel image to build when no specific target is given.
@@ -980,11 +1027,6 @@ prepare0: archprepare FORCE
# All the preparing..
prepare: prepare0
-# Leave this as default for preprocessing vmlinux.lds.S, which is now
-# done in arch/$(ARCH)/kernel/Makefile
-
-export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
-
# The asm symlink changes when $(ARCH) changes.
# Detect this and ask user to run make mrproper
# If asm is a stale symlink (point to dir that does not exist) remove it
diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h
index 25da0017ec8..e42823e954a 100644
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -26,6 +26,8 @@
#define F_GETOWN 6 /* for sockets. */
#define F_SETSIG 10 /* for sockets. */
#define F_GETSIG 11 /* for sockets. */
+#define F_SETOWN_EX 12
+#define F_GETOWN_EX 13
/* for posix fcntl() and lockf() */
#define F_RDLCK 1
diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h
index 547e90951ce..3f390e8cc0b 100644
--- a/arch/alpha/include/asm/smp.h
+++ b/arch/alpha/include/asm/smp.h
@@ -47,7 +47,7 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS];
extern int smp_num_cpus;
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#else /* CONFIG_SMP */
diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h
index b4f284c72ff..36b3a30ba0e 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -22,23 +22,6 @@ static inline int cpu_to_node(int cpu)
return node;
}
-static inline cpumask_t node_to_cpumask(int node)
-{
- cpumask_t node_cpu_mask = CPU_MASK_NONE;
- int cpu;
-
- for_each_online_cpu(cpu) {
- if (cpu_to_node(cpu) == node)
- cpu_set(cpu, node_cpu_mask);
- }
-
-#ifdef DEBUG_NUMA
- printk("node %d: cpu_mask: %016lx\n", node, node_cpu_mask);
-#endif
-
- return node_cpu_mask;
-}
-
extern struct cpumask node_to_cpumask_map[];
/* FIXME: This is dumb, recalculating every time. But simple. */
static const struct cpumask *cpumask_of_node(int node)
@@ -55,7 +38,6 @@ static const struct cpumask *cpumask_of_node(int node)
return &node_to_cpumask_map[node];
}
-#define pcibus_to_cpumask(bus) (cpu_online_map)
#define cpumask_of_pcibus(bus) (cpu_online_mask)
#endif /* !CONFIG_NUMA */
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index e302daecbe5..8e059e58b0a 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -1016,7 +1016,7 @@ marvel_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *m
{
struct marvel_agp_aperture *aper = agp->aperture.sysdata;
return iommu_bind(aper->arena, aper->pg_start + pg_start,
- mem->page_count, mem->memory);
+ mem->page_count, mem->pages);
}
static int
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
index 319fcb74611..76686497b1e 100644
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -680,7 +680,7 @@ titan_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *me
{
struct titan_agp_aperture *aper = agp->aperture.sysdata;
return iommu_bind(aper->arena, aper->pg_start + pg_start,
- mem->page_count, mem->memory);
+ mem->page_count, mem->pages);
}
static int
diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h
index 00edd04b585..85457b2d451 100644
--- a/arch/alpha/kernel/pci_impl.h
+++ b/arch/alpha/kernel/pci_impl.h
@@ -198,7 +198,7 @@ extern unsigned long size_for_memory(unsigned long max);
extern int iommu_reserve(struct pci_iommu_arena *, long, long);
extern int iommu_release(struct pci_iommu_arena *, long, long);
-extern int iommu_bind(struct pci_iommu_arena *, long, long, unsigned long *);
+extern int iommu_bind(struct pci_iommu_arena *, long, long, struct page **);
extern int iommu_unbind(struct pci_iommu_arena *, long, long);
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index d15aedfe606..8449504f5e0 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -876,7 +876,7 @@ iommu_release(struct pci_iommu_arena *arena, long pg_start, long pg_count)
int
iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count,
- unsigned long *physaddrs)
+ struct page **pages)
{
unsigned long flags;
unsigned long *ptes;
@@ -896,7 +896,7 @@ iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count,
}
for(i = 0, j = pg_start; i < pg_count; i++, j++)
- ptes[j] = mk_iommu_pte(physaddrs[i]);
+ ptes[j] = mk_iommu_pte(page_to_phys(pages[i]));
spin_unlock_irqrestore(&arena->lock, flags);
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 3a2fb7a02db..289039bb6bb 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -19,7 +19,6 @@
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/user.h>
-#include <linux/utsname.h>
#include <linux/time.h>
#include <linux/major.h>
#include <linux/stat.h>
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index b1fe5674c3a..42aa078a5e4 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -548,16 +548,16 @@ setup_profiling_timer(unsigned int multiplier)
static void
-send_ipi_message(cpumask_t to_whom, enum ipi_message_type operation)
+send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
{
int i;
mb();
- for_each_cpu_mask(i, to_whom)
+ for_each_cpu(i, to_whom)
set_bit(operation, &ipi_data[i].bits);
mb();
- for_each_cpu_mask(i, to_whom)
+ for_each_cpu(i, to_whom)
wripir(i);
}
@@ -624,7 +624,7 @@ smp_send_reschedule(int cpu)
printk(KERN_WARNING
"smp_send_reschedule: Sending IPI to self.\n");
#endif
- send_ipi_message(cpumask_of_cpu(cpu), IPI_RESCHEDULE);
+ send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
}
void
@@ -636,17 +636,17 @@ smp_send_stop(void)
if (hard_smp_processor_id() != boot_cpu_id)
printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n");
#endif
- send_ipi_message(to_whom, IPI_CPU_STOP);
+ send_ipi_message(&to_whom, IPI_CPU_STOP);
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
send_ipi_message(mask, IPI_CALL_FUNC);
}
void arch_send_call_function_single_ipi(int cpu)
{
- send_ipi_message(cpumask_of_cpu(cpu), IPI_CALL_FUNC_SINGLE);
+ send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
}
static void
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 54661125a8b..a73caaf6676 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -14,7 +14,7 @@ LDFLAGS_vmlinux :=-p --no-undefined -X
ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
LDFLAGS_vmlinux += --be8
endif
-CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
+
OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
GZFLAGS :=-9
#KBUILD_CFLAGS +=-pipe
@@ -279,7 +279,7 @@ define archhelp
echo ' (supply initrd image via make variable INITRD=<path>)'
echo ' install - Install uncompressed kernel'
echo ' zinstall - Install compressed kernel'
- echo ' Install using (your) ~/bin/installkernel or'
- echo ' (distribution) /sbin/installkernel or'
+ echo ' Install using (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH) and run lilo'
endef
diff --git a/arch/arm/boot/install.sh b/arch/arm/boot/install.sh
index 9f9bed20734..06ea7d42ce8 100644
--- a/arch/arm/boot/install.sh
+++ b/arch/arm/boot/install.sh
@@ -21,8 +21,8 @@
#
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
if [ "$(basename $2)" = "zImage" ]; then
# Compressed install
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 1a711ea8418..fd03fb63a33 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -334,14 +334,14 @@ static inline void outer_flush_range(unsigned long start, unsigned long end)
#ifndef CONFIG_CPU_CACHE_VIPT
static inline void flush_cache_mm(struct mm_struct *mm)
{
- if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
__cpuc_flush_user_all();
}
static inline void
flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
vma->vm_flags);
}
@@ -349,7 +349,7 @@ flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long
static inline void
flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
{
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
unsigned long addr = user_addr & PAGE_MASK;
__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
}
@@ -360,7 +360,7 @@ flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
unsigned long uaddr, void *kaddr,
unsigned long len, int write)
{
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
unsigned long addr = (unsigned long)kaddr;
__cpuc_coherent_kern_range(addr, addr + len);
}
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 83e6ba338e2..1a8c7279a28 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -187,11 +187,74 @@ union iop3xx_desc {
void *ptr;
};
+/* No support for p+q operations */
+static inline int
+iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+ BUG();
+ return 0;
+}
+
+static inline void
+iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ BUG();
+}
+
+static inline void
+iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
+{
+ BUG();
+}
+
+static inline void
+iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+ dma_addr_t addr, unsigned char coef)
+{
+ BUG();
+}
+
+static inline int
+iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+ BUG();
+ return 0;
+}
+
+static inline void
+iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ BUG();
+}
+
+static inline void
+iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
+{
+ BUG();
+}
+
+#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
+
+static inline void
+iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
+ dma_addr_t *src)
+{
+ BUG();
+}
+
static inline int iop_adma_get_max_xor(void)
{
return 32;
}
+static inline int iop_adma_get_max_pq(void)
+{
+ BUG();
+ return 0;
+}
+
static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
{
int id = chan->device->id;
@@ -332,6 +395,11 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
return slot_cnt;
}
+static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
+{
+ return 0;
+}
+
static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
@@ -349,6 +417,14 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
return 0;
}
+
+static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *chan)
+{
+ BUG();
+ return 0;
+}
+
static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
@@ -756,13 +832,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
hw_desc->src[0] = val;
}
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
{
struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
- return desc_ctrl.zero_result_err;
+ return desc_ctrl.zero_result_err << SUM_CHECK_P;
}
static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h
index 385c6e8cbbd..59b8c3892f7 100644
--- a/arch/arm/include/asm/hardware/iop_adma.h
+++ b/arch/arm/include/asm/hardware/iop_adma.h
@@ -86,6 +86,7 @@ struct iop_adma_chan {
* @idx: pool index
* @unmap_src_cnt: number of xor sources
* @unmap_len: transaction bytecount
+ * @tx_list: list of descriptors that are associated with one operation
* @async_tx: support for the async_tx api
* @group_list: list of slots that make up a multi-descriptor transaction
* for example transfer lengths larger than the supported hw max
@@ -102,10 +103,12 @@ struct iop_adma_desc_slot {
u16 idx;
u16 unmap_src_cnt;
size_t unmap_len;
+ struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
union {
u32 *xor_check_result;
u32 *crc32_result;
+ u32 *pq_check_result;
};
};
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index bcdb9291ef0..de6cefb329d 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -103,14 +103,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
#ifdef CONFIG_SMP
/* check for possible thread migration */
- if (!cpus_empty(next->cpu_vm_mask) && !cpu_isset(cpu, next->cpu_vm_mask))
+ if (!cpumask_empty(mm_cpumask(next)) &&
+ !cpumask_test_cpu(cpu, mm_cpumask(next)))
__flush_icache_all();
#endif
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask) || prev != next) {
+ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) {
check_context(next);
cpu_switch_mm(next->pgd, next);
if (cache_is_vivt())
- cpu_clear(cpu, prev->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
}
#endif
}
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index a06e735b262..e0d763be184 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -93,7 +93,6 @@ extern void platform_cpu_enable(unsigned int cpu);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
/*
* show local interrupt info
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index c964f3fc3bc..a45ab5dd825 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -350,7 +350,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
if (tlb_flag(TLB_WB))
dsb();
- if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
if (tlb_flag(TLB_V3_FULL))
asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc");
if (tlb_flag(TLB_V4_U_FULL))
@@ -388,7 +388,7 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
if (tlb_flag(TLB_WB))
dsb();
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
if (tlb_flag(TLB_V3_PAGE))
asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (uaddr) : "cc");
if (tlb_flag(TLB_V4_U_PAGE))
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 3213c9382b1..c446aeff7b8 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -2,7 +2,8 @@
# Makefile for the linux kernel.
#
-AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
+AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
ifdef CONFIG_DYNAMIC_FTRACE
CFLAGS_REMOVE_ftrace.o = -pg
diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c
index 3f470866bb8..e7cbb50dc35 100644
--- a/arch/arm/kernel/init_task.c
+++ b/arch/arm/kernel/init_task.c
@@ -24,9 +24,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
*
* The things we do for performance..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index de885fd256c..e0d32770bb3 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -189,7 +189,7 @@ int __cpuexit __cpu_disable(void)
read_lock(&tasklist_lock);
for_each_process(p) {
if (p->mm)
- cpu_clear(cpu, p->mm->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
}
read_unlock(&tasklist_lock);
@@ -257,7 +257,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
atomic_inc(&mm->mm_users);
atomic_inc(&mm->mm_count);
current->active_mm = mm;
- cpu_set(cpu, mm->cpu_vm_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(mm));
cpu_switch_mm(mm->pgd, mm);
enter_lazy_tlb(mm, current);
local_flush_tlb_all();
@@ -643,7 +643,7 @@ void flush_tlb_all(void)
void flush_tlb_mm(struct mm_struct *mm)
{
if (tlb_ops_need_broadcast())
- on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask);
+ on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm));
else
local_flush_tlb_mm(mm);
}
@@ -654,7 +654,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
struct tlb_args ta;
ta.ta_vma = vma;
ta.ta_start = uaddr;
- on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+ on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm));
} else
local_flush_tlb_page(vma, uaddr);
}
@@ -677,7 +677,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
ta.ta_vma = vma;
ta.ta_start = start;
ta.ta_end = end;
- on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+ on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm));
} else
local_flush_tlb_range(vma, start, end);
}
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index b3ec641b5cf..78ecaac6520 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -25,7 +25,6 @@
#include <linux/mman.h>
#include <linux/fs.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/ipc.h>
#include <linux/uaccess.h>
diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c
index 55719a97427..fb5c23af101 100644
--- a/arch/arm/mach-at91/at91sam9263_devices.c
+++ b/arch/arm/mach-at91/at91sam9263_devices.c
@@ -757,6 +757,42 @@ void __init at91_add_device_ac97(struct ac97c_platform_data *data)
void __init at91_add_device_ac97(struct ac97c_platform_data *data) {}
#endif
+/* --------------------------------------------------------------------
+ * CAN Controller
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_CAN_AT91) || defined(CONFIG_CAN_AT91_MODULE)
+static struct resource can_resources[] = {
+ [0] = {
+ .start = AT91SAM9263_BASE_CAN,
+ .end = AT91SAM9263_BASE_CAN + SZ_16K - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = AT91SAM9263_ID_CAN,
+ .end = AT91SAM9263_ID_CAN,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device at91sam9263_can_device = {
+ .name = "at91_can",
+ .id = -1,
+ .resource = can_resources,
+ .num_resources = ARRAY_SIZE(can_resources),
+};
+
+void __init at91_add_device_can(struct at91_can_data *data)
+{
+ at91_set_A_periph(AT91_PIN_PA13, 0); /* CANTX */
+ at91_set_A_periph(AT91_PIN_PA14, 0); /* CANRX */
+ at91sam9263_can_device.dev.platform_data = data;
+
+ platform_device_register(&at91sam9263_can_device);
+}
+#else
+void __init at91_add_device_can(struct at91_can_data *data) {}
+#endif
/* --------------------------------------------------------------------
* LCD Controller
diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c
index 26f1aa6049a..2d867fb0630 100644
--- a/arch/arm/mach-at91/board-sam9263ek.c
+++ b/arch/arm/mach-at91/board-sam9263ek.c
@@ -400,6 +400,23 @@ static struct gpio_led ek_pwm_led[] = {
}
};
+/*
+ * CAN
+ */
+static void sam9263ek_transceiver_switch(int on)
+{
+ if (on) {
+ at91_set_gpio_output(AT91_PIN_PA18, 1); /* CANRXEN */
+ at91_set_gpio_output(AT91_PIN_PA19, 0); /* CANRS */
+ } else {
+ at91_set_gpio_output(AT91_PIN_PA18, 0); /* CANRXEN */
+ at91_set_gpio_output(AT91_PIN_PA19, 1); /* CANRS */
+ }
+}
+
+static struct at91_can_data ek_can_data = {
+ .transceiver_switch = sam9263ek_transceiver_switch,
+};
static void __init ek_board_init(void)
{
@@ -431,6 +448,8 @@ static void __init ek_board_init(void)
/* LEDs */
at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds));
at91_pwm_leds(ek_pwm_led, ARRAY_SIZE(ek_pwm_led));
+ /* CAN */
+ at91_add_device_can(&ek_can_data);
}
MACHINE_START(AT91SAM9263EK, "Atmel AT91SAM9263-EK")
diff --git a/arch/arm/mach-at91/include/mach/board.h b/arch/arm/mach-at91/include/mach/board.h
index 583f38a38df..2f4fcedc02b 100644
--- a/arch/arm/mach-at91/include/mach/board.h
+++ b/arch/arm/mach-at91/include/mach/board.h
@@ -188,6 +188,12 @@ extern void __init at91_add_device_isi(void);
/* Touchscreen Controller */
extern void __init at91_add_device_tsadcc(void);
+/* CAN */
+struct at91_can_data {
+ void (*transceiver_switch)(int on);
+};
+extern void __init at91_add_device_can(struct at91_can_data *data);
+
/* LEDs */
extern void __init at91_init_leds(u8 cpu_led, u8 timer_led);
extern void __init at91_gpio_leds(struct gpio_led *leds, int nr);
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h
index 5722e86f217..6d3782d85a9 100644
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -150,6 +150,8 @@ static inline int iop_adma_get_max_xor(void)
return 16;
}
+#define iop_adma_get_max_pq iop_adma_get_max_xor
+
static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
{
return __raw_readl(ADMA_ADAR(chan));
@@ -211,7 +213,10 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
+#define IOP_ADMA_PQ_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
+#define iop_chan_pq_slot_count iop_chan_xor_slot_count
+#define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
@@ -220,6 +225,13 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
return hw_desc->dest_addr;
}
+static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *chan)
+{
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+ return hw_desc->q_dest_addr;
+}
+
static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
@@ -319,6 +331,58 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
return 1;
}
+static inline void
+iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+ union {
+ u32 value;
+ struct iop13xx_adma_desc_ctrl field;
+ } u_desc_ctrl;
+
+ u_desc_ctrl.value = 0;
+ u_desc_ctrl.field.src_select = src_cnt - 1;
+ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
+ u_desc_ctrl.field.pq_xfer_en = 1;
+ u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
+ u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+ hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
+static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
+{
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+ union {
+ u32 value;
+ struct iop13xx_adma_desc_ctrl field;
+ } u_desc_ctrl;
+
+ u_desc_ctrl.value = hw_desc->desc_ctrl;
+ return u_desc_ctrl.field.pq_xfer_en;
+}
+
+static inline void
+iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+ union {
+ u32 value;
+ struct iop13xx_adma_desc_ctrl field;
+ } u_desc_ctrl;
+
+ u_desc_ctrl.value = 0;
+ u_desc_ctrl.field.src_select = src_cnt - 1;
+ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
+ u_desc_ctrl.field.zero_result = 1;
+ u_desc_ctrl.field.status_write_back_en = 1;
+ u_desc_ctrl.field.pq_xfer_en = 1;
+ u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
+ u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+ hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
u32 byte_count)
@@ -351,6 +415,7 @@ iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
}
}
+#define iop_desc_set_pq_zero_sum_byte_count iop_desc_set_zero_sum_byte_count
static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
@@ -361,6 +426,16 @@ static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
hw_desc->upper_dest_addr = 0;
}
+static inline void
+iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
+{
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+
+ hw_desc->dest_addr = addr[0];
+ hw_desc->q_dest_addr = addr[1];
+ hw_desc->upper_dest_addr = 0;
+}
+
static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
dma_addr_t addr)
{
@@ -389,6 +464,29 @@ static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
}
static inline void
+iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+ dma_addr_t addr, unsigned char coef)
+{
+ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
+ struct iop13xx_adma_src *src;
+ int i = 0;
+
+ do {
+ iter = iop_hw_desc_slot_idx(hw_desc, i);
+ src = &iter->src[src_idx];
+ src->src_addr = addr;
+ src->pq_upper_src_addr = 0;
+ src->pq_dmlt = coef;
+ slot_cnt -= slots_per_op;
+ if (slot_cnt) {
+ i += slots_per_op;
+ addr += IOP_ADMA_PQ_MAX_BYTE_COUNT;
+ }
+ } while (slot_cnt);
+}
+
+static inline void
iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
@@ -399,6 +497,15 @@ iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
}
#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
+#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
+
+static inline void
+iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
+ dma_addr_t *src)
+{
+ iop_desc_set_xor_src_addr(desc, pq_idx, src[pq_idx]);
+ iop_desc_set_xor_src_addr(desc, pq_idx+1, src[pq_idx+1]);
+}
static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
u32 next_desc_addr)
@@ -428,18 +535,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
hw_desc->block_fill_data = val;
}
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
+ enum sum_check_flags flags;
BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
- if (desc_ctrl.pq_xfer_en)
- return byte_count.zero_result_err_q;
- else
- return byte_count.zero_result_err;
+ flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
+ flags |= byte_count.zero_result_err << SUM_CHECK_P;
+
+ return flags;
}
static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index bee42c609df..5c147fb66a0 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void)
plat_data = &iop13xx_adma_0_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+ dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
- dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
break;
case IOP13XX_INIT_ADMA_1:
@@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void)
plat_data = &iop13xx_adma_1_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+ dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
- dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
break;
case IOP13XX_INIT_ADMA_2:
@@ -501,14 +497,11 @@ void __init iop13xx_platform_init(void)
plat_data = &iop13xx_adma_2_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+ dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
- dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
- dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
- dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
+ dma_cap_set(DMA_PQ, plat_data->cap_mask);
+ dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
break;
}
}
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index fc84fcc7438..6bda76a4319 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -59,6 +59,6 @@ void __new_context(struct mm_struct *mm)
}
spin_unlock(&cpu_asid_lock);
- mm->cpu_vm_mask = cpumask_of_cpu(smp_processor_id());
+ cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id()));
mm->context.id = asid;
}
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 575f3ad722e..b27942909b2 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -50,7 +50,7 @@ static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)
void flush_cache_mm(struct mm_struct *mm)
{
if (cache_is_vivt()) {
- if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
__cpuc_flush_user_all();
return;
}
@@ -73,7 +73,7 @@ void flush_cache_mm(struct mm_struct *mm)
void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
if (cache_is_vivt()) {
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
vma->vm_flags);
return;
@@ -97,7 +97,7 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
{
if (cache_is_vivt()) {
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
unsigned long addr = user_addr & PAGE_MASK;
__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
}
@@ -113,7 +113,7 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
unsigned long len, int write)
{
if (cache_is_vivt()) {
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
unsigned long addr = (unsigned long)kaddr;
__cpuc_coherent_kern_range(addr, addr + len);
}
@@ -126,7 +126,7 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
}
/* VIPT non-aliasing cache */
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask) &&
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)) &&
vma->vm_flags & VM_EXEC) {
unsigned long addr = (unsigned long)kaddr;
/* only flushing the kernel mapping on non-aliasing VIPT */
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index 3c127aabe21..1ff6a37e893 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void)
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
#else
dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
- dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
#endif
@@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void)
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
#else
dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
- dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
#endif
@@ -198,7 +196,7 @@ static int __init iop3xx_adma_cap_init(void)
dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
#else
dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
- dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask);
+ dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
#endif
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
index 57ec9f2dcd9..6b2343e6fe3 100644
--- a/arch/avr32/kernel/init_task.c
+++ b/arch/avr32/kernel/init_task.c
@@ -18,9 +18,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
/*
* Initial thread structure. Must be aligned on an 8192-byte boundary.
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
index 376f18c4a6c..94925641e53 100644
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -24,11 +24,9 @@
#include <asm/setup.h>
#include <asm/sections.h>
-#define __page_aligned __attribute__((section(".data.page_aligned")))
-
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned;
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_data;
struct page *empty_zero_page;
EXPORT_SYMBOL(empty_zero_page);
diff --git a/arch/blackfin/Makefile b/arch/blackfin/Makefile
index 6f9533c3d75..f063b772934 100644
--- a/arch/blackfin/Makefile
+++ b/arch/blackfin/Makefile
@@ -155,7 +155,7 @@ define archhelp
echo '* vmImage.gz - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.gz)'
echo ' vmImage.lzma - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.lzma)'
echo ' install - Install kernel using'
- echo ' (your) ~/bin/$(CROSS_COMPILE)installkernel or'
- echo ' (distribution) PATH: $(CROSS_COMPILE)installkernel or'
+ echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) PATH: $(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH)'
endef
diff --git a/arch/blackfin/boot/install.sh b/arch/blackfin/boot/install.sh
index 9560a6b2910..e2c6e40902b 100644
--- a/arch/blackfin/boot/install.sh
+++ b/arch/blackfin/boot/install.sh
@@ -36,9 +36,9 @@ verify "$3"
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if which ${CROSS_COMPILE}installkernel >/dev/null 2>&1; then
- exec ${CROSS_COMPILE}installkernel "$@"
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if which ${INSTALLKERNEL} >/dev/null 2>&1; then
+ exec ${INSTALLKERNEL} "$@"
fi
# Default install - same as make zlilo
diff --git a/arch/cris/Makefile b/arch/cris/Makefile
index 71e17d3eedd..29c2ceb38a7 100644
--- a/arch/cris/Makefile
+++ b/arch/cris/Makefile
@@ -42,8 +42,6 @@ LD = $(CROSS_COMPILE)ld -mcrislinux
OBJCOPYFLAGS := -O binary -R .note -R .comment -S
-CPPFLAGS_vmlinux.lds = -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE)
-
KBUILD_AFLAGS += -mlinux -march=$(arch-y) $(inc)
KBUILD_CFLAGS += -mlinux -march=$(arch-y) -pipe $(inc)
KBUILD_CPPFLAGS += $(inc)
diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile
index ee7bcd4d20b..b45640b3e60 100644
--- a/arch/cris/kernel/Makefile
+++ b/arch/cris/kernel/Makefile
@@ -3,6 +3,7 @@
# Makefile for the linux kernel.
#
+CPPFLAGS_vmlinux.lds := -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE)
extra-y := vmlinux.lds
obj-y := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 51dcd04d277..c99aeab7cef 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -45,9 +45,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c
index 1d3df1d9495..3c3e0b336a9 100644
--- a/arch/frv/kernel/init_task.c
+++ b/arch/frv/kernel/init_task.c
@@ -19,9 +19,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index be722fc1acf..0d4d3e3a4cf 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len)
/*
* Send us to sleep.
*/
-static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp,
+static int sysctl_pm_do_suspend(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *fpos)
{
int retval, mode;
@@ -198,13 +198,13 @@ static int try_set_cmode(int new_cmode)
}
-static int cmode_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cmode_procctl(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *fpos)
{
int new_cmode;
if (!write)
- return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+ return proc_dointvec(ctl, write, buffer, lenp, fpos);
new_cmode = user_atoi(buffer, *lenp);
@@ -301,13 +301,13 @@ static int try_set_cm(int new_cm)
return 0;
}
-static int p0_procctl(ctl_table *ctl, int write, struct file *filp,
+static int p0_procctl(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *fpos)
{
int new_p0;
if (!write)
- return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+ return proc_dointvec(ctl, write, buffer, lenp, fpos);
new_p0 = user_atoi(buffer, *lenp);
@@ -345,13 +345,13 @@ static int p0_sysctl(ctl_table *table,
return 1;
}
-static int cm_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cm_procctl(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *fpos)
{
int new_cm;
if (!write)
- return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+ return proc_dointvec(ctl, write, buffer, lenp, fpos);
new_cm = user_atoi(buffer, *lenp);
diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c
index baadc97f862..2b6b5289cdc 100644
--- a/arch/frv/kernel/sys_frv.c
+++ b/arch/frv/kernel/sys_frv.c
@@ -21,7 +21,6 @@
#include <linux/stat.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/syscalls.h>
#include <linux/ipc.h>
diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c
index 089c65ed6eb..54c1062ee80 100644
--- a/arch/h8300/kernel/init_task.c
+++ b/arch/h8300/kernel/init_task.c
@@ -31,7 +31,6 @@ EXPORT_SYMBOL(init_task);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index 2745656dcc5..8cb5d73a0e3 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -17,7 +17,6 @@
#include <linux/syscalls.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/fs.h>
#include <linux/ipc.h>
diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h
index d217d1d4e05..0b3b3997dec 100644
--- a/arch/ia64/include/asm/smp.h
+++ b/arch/ia64/include/asm/smp.h
@@ -127,7 +127,6 @@ extern int is_multithreading_enabled(void);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
#else /* CONFIG_SMP */
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index d0141fbf51d..3ddb4e709db 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -33,7 +33,6 @@
/*
* Returns a bitmask of CPUs on Node 'node'.
*/
-#define node_to_cpumask(node) (node_to_cpu_mask[node])
#define cpumask_of_node(node) (&node_to_cpu_mask[node])
/*
@@ -104,8 +103,6 @@ void build_cpu_to_node_map(void);
#ifdef CONFIG_SMP
#define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id)
#define topology_core_id(cpu) (cpu_data(cpu)->core_id)
-#define topology_core_siblings(cpu) (cpu_core_map[cpu])
-#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#define smt_capable() (smp_num_siblings > 1)
diff --git a/arch/ia64/install.sh b/arch/ia64/install.sh
index 929e780026d..0e932f5dcd1 100644
--- a/arch/ia64/install.sh
+++ b/arch/ia64/install.sh
@@ -21,8 +21,8 @@
# User may have a custom install script
-if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
-if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate
index 1d87f84069b..ab9b03a9adc 100644
--- a/arch/ia64/kernel/Makefile.gate
+++ b/arch/ia64/kernel/Makefile.gate
@@ -10,7 +10,7 @@ quiet_cmd_gate = GATE $@
cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
$(call if_changed,gate)
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index c475fc281be..e253ab8fcbc 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -33,7 +33,8 @@ union {
struct thread_info thread_info;
} s;
unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
-} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
+} init_task_mem asm ("init_task") __init_task_data =
+ {{
.task = INIT_TASK(init_task_mem.s.task),
.thread_info = INIT_THREAD_INFO(init_task_mem.s.task)
}};
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 93ebfea43c6..dabeefe2113 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -302,7 +302,7 @@ smp_flush_tlb_mm (struct mm_struct *mm)
return;
}
- smp_call_function_mask(mm->cpu_vm_mask,
+ smp_call_function_many(mm_cpumask(mm),
(void (*)(void *))local_finish_flush_tlb_mm, mm, 1);
local_irq_disable();
local_finish_flush_tlb_mm(mm);
diff --git a/arch/m32r/boot/compressed/install.sh b/arch/m32r/boot/compressed/install.sh
index 6d72e9e7269..16e5a0a1343 100644
--- a/arch/m32r/boot/compressed/install.sh
+++ b/arch/m32r/boot/compressed/install.sh
@@ -24,8 +24,8 @@
# User may have a custom install script
-if [ -x /sbin/installkernel ]; then
- exec /sbin/installkernel "$@"
+if [ -x /sbin/${INSTALLKERNEL} ]; then
+ exec /sbin/${INSTALLKERNEL} "$@"
fi
if [ "$2" = "zImage" ]; then
diff --git a/arch/m32r/include/asm/mmu_context.h b/arch/m32r/include/asm/mmu_context.h
index 91909e5dd9d..a70a3df3363 100644
--- a/arch/m32r/include/asm/mmu_context.h
+++ b/arch/m32r/include/asm/mmu_context.h
@@ -127,7 +127,7 @@ static inline void switch_mm(struct mm_struct *prev,
if (prev != next) {
#ifdef CONFIG_SMP
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
#endif /* CONFIG_SMP */
/* Set MPTB = next->pgd */
*(volatile unsigned long *)MPTB = (unsigned long)next->pgd;
@@ -135,7 +135,7 @@ static inline void switch_mm(struct mm_struct *prev,
}
#ifdef CONFIG_SMP
else
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask))
+ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)))
activate_context(next);
#endif /* CONFIG_SMP */
}
diff --git a/arch/m32r/include/asm/smp.h b/arch/m32r/include/asm/smp.h
index b96a6d2ffbc..e67ded1aab9 100644
--- a/arch/m32r/include/asm/smp.h
+++ b/arch/m32r/include/asm/smp.h
@@ -88,7 +88,7 @@ extern void smp_send_timer(void);
extern unsigned long send_IPI_mask_phys(cpumask_t, int, int);
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#endif /* not __ASSEMBLY__ */
diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c
index fce57e5d3f9..6c42d5f8df5 100644
--- a/arch/m32r/kernel/init_task.c
+++ b/arch/m32r/kernel/init_task.c
@@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 929e5c9d3ad..1b7598e6f6e 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -85,7 +85,7 @@ void smp_ipi_timer_interrupt(struct pt_regs *);
void smp_local_timer_interrupt(void);
static void send_IPI_allbutself(int, int);
-static void send_IPI_mask(cpumask_t, int, int);
+static void send_IPI_mask(const struct cpumask *, int, int);
unsigned long send_IPI_mask_phys(cpumask_t, int, int);
/*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
@@ -113,7 +113,7 @@ unsigned long send_IPI_mask_phys(cpumask_t, int, int);
void smp_send_reschedule(int cpu_id)
{
WARN_ON(cpu_is_offline(cpu_id));
- send_IPI_mask(cpumask_of_cpu(cpu_id), RESCHEDULE_IPI, 1);
+ send_IPI_mask(cpumask_of(cpu_id), RESCHEDULE_IPI, 1);
}
/*==========================================================================*
@@ -168,7 +168,7 @@ void smp_flush_cache_all(void)
spin_lock(&flushcache_lock);
mask=cpus_addr(cpumask);
atomic_set_mask(*mask, (atomic_t *)&flushcache_cpumask);
- send_IPI_mask(cpumask, INVALIDATE_CACHE_IPI, 0);
+ send_IPI_mask(&cpumask, INVALIDATE_CACHE_IPI, 0);
_flush_cache_copyback_all();
while (flushcache_cpumask)
mb();
@@ -264,7 +264,7 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
preempt_disable();
cpu_id = smp_processor_id();
mmc = &mm->context[cpu_id];
- cpu_mask = mm->cpu_vm_mask;
+ cpu_mask = *mm_cpumask(mm);
cpu_clear(cpu_id, cpu_mask);
if (*mmc != NO_CONTEXT) {
@@ -273,7 +273,7 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
if (mm == current->mm)
activate_context(mm);
else
- cpu_clear(cpu_id, mm->cpu_vm_mask);
+ cpumask_clear_cpu(cpu_id, mm_cpumask(mm));
local_irq_restore(flags);
}
if (!cpus_empty(cpu_mask))
@@ -334,7 +334,7 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
preempt_disable();
cpu_id = smp_processor_id();
mmc = &mm->context[cpu_id];
- cpu_mask = mm->cpu_vm_mask;
+ cpu_mask = *mm_cpumask(mm);
cpu_clear(cpu_id, cpu_mask);
#ifdef DEBUG_SMP
@@ -424,7 +424,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
* We have to send the IPI only to
* CPUs affected.
*/
- send_IPI_mask(cpumask, INVALIDATE_TLB_IPI, 0);
+ send_IPI_mask(&cpumask, INVALIDATE_TLB_IPI, 0);
while (!cpus_empty(flush_cpumask)) {
/* nothing. lockup detection does not belong here */
@@ -469,7 +469,7 @@ void smp_invalidate_interrupt(void)
if (flush_mm == current->active_mm)
activate_context(flush_mm);
else
- cpu_clear(cpu_id, flush_mm->cpu_vm_mask);
+ cpumask_clear_cpu(cpu_id, mm_cpumask(flush_mm));
} else {
unsigned long va = flush_va;
@@ -546,14 +546,14 @@ static void stop_this_cpu(void *dummy)
for ( ; ; );
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
send_IPI_mask(mask, CALL_FUNCTION_IPI, 0);
}
void arch_send_call_function_single_ipi(int cpu)
{
- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNC_SINGLE_IPI, 0);
+ send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_IPI, 0);
}
/*==========================================================================*
@@ -729,7 +729,7 @@ static void send_IPI_allbutself(int ipi_num, int try)
cpumask = cpu_online_map;
cpu_clear(smp_processor_id(), cpumask);
- send_IPI_mask(cpumask, ipi_num, try);
+ send_IPI_mask(&cpumask, ipi_num, try);
}
/*==========================================================================*
@@ -752,7 +752,7 @@ static void send_IPI_allbutself(int ipi_num, int try)
* ---------- --- --------------------------------------------------------
*
*==========================================================================*/
-static void send_IPI_mask(cpumask_t cpumask, int ipi_num, int try)
+static void send_IPI_mask(const struct cpumask *cpumask, int ipi_num, int try)
{
cpumask_t physid_mask, tmp;
int cpu_id, phys_id;
@@ -761,11 +761,11 @@ static void send_IPI_mask(cpumask_t cpumask, int ipi_num, int try)
if (num_cpus <= 1) /* NO MP */
return;
- cpus_and(tmp, cpumask, cpu_online_map);
- BUG_ON(!cpus_equal(cpumask, tmp));
+ cpumask_and(&tmp, cpumask, cpu_online_mask);
+ BUG_ON(!cpumask_equal(cpumask, &tmp));
physid_mask = CPU_MASK_NONE;
- for_each_cpu_mask(cpu_id, cpumask){
+ for_each_cpu(cpu_id, cpumask) {
if ((phys_id = cpu_to_physid(cpu_id)) != -1)
cpu_set(phys_id, physid_mask);
}
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index 655ea1c47a0..e034844cfc0 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -178,7 +178,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
for (phys_id = 0 ; phys_id < nr_cpu ; phys_id++)
physid_set(phys_id, phys_cpu_present_map);
#ifndef CONFIG_HOTPLUG_CPU
- cpu_present_map = cpu_possible_map;
+ init_cpu_present(&cpu_possible_map);
#endif
show_mp_info(nr_cpu);
diff --git a/arch/m68k/install.sh b/arch/m68k/install.sh
index 9c6bae6112e..57d640d4382 100644
--- a/arch/m68k/install.sh
+++ b/arch/m68k/install.sh
@@ -33,8 +33,8 @@ verify "$3"
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 72bad65dba3..41230c595a8 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -42,9 +42,9 @@
*/
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-union thread_union init_thread_union
-__attribute__((section(".data.init_task"), aligned(THREAD_SIZE)))
- = { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data
+ __attribute__((aligned(THREAD_SIZE))) =
+ { INIT_THREAD_INFO(init_task) };
/* initial task structure */
struct task_struct init_task = INIT_TASK(init_task);
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 7f54efaf60b..7deb402bfc7 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -20,7 +20,6 @@
#include <linux/syscalls.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/ipc.h>
#include <asm/setup.h>
diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c
index 45e97a207fe..cbf9dc3cc51 100644
--- a/arch/m68knommu/kernel/init_task.c
+++ b/arch/m68knommu/kernel/init_task.c
@@ -31,7 +31,6 @@ EXPORT_SYMBOL(init_task);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index 70028163862..efdd090778a 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -17,7 +17,6 @@
#include <linux/syscalls.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/ipc.h>
#include <linux/fs.h>
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 2db722d80d4..bbd8327f189 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -6,6 +6,7 @@ mainmenu "Linux/Microblaze Kernel Configuration"
config MICROBLAZE
def_bool y
select HAVE_LMB
+ select USB_ARCH_HAS_EHCI
select ARCH_WANT_OPTIONAL_GPIOLIB
config SWAP
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 8439598d465..34187354304 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -37,12 +37,12 @@ CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare
CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
# r31 holds current when in kernel mode
-KBUILD_KERNEL += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
+KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
LDFLAGS :=
LDFLAGS_vmlinux :=
-LIBGCC := $(shell $(CC) $(KBUILD_KERNEL) -print-libgcc-file-name)
+LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
head-y := arch/microblaze/kernel/head.o
libs-y += arch/microblaze/lib/
@@ -53,22 +53,41 @@ core-y += arch/microblaze/platform/
boot := arch/microblaze/boot
+# Are we making a simpleImage.<boardname> target? If so, crack out the boardname
+DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS)))
+
+ifneq ($(DTB),)
+ core-y += $(boot)/
+endif
+
# defines filename extension depending memory management type
ifeq ($(CONFIG_MMU),)
MMU := -nommu
endif
-export MMU
+export MMU DTB
all: linux.bin
+BOOT_TARGETS = linux.bin linux.bin.gz simpleImage.%
+
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
-linux.bin linux.bin.gz: vmlinux
+$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
define archhelp
- echo '* linux.bin - Create raw binary'
- echo ' linux.bin.gz - Create compressed raw binary'
+ echo '* linux.bin - Create raw binary'
+ echo ' linux.bin.gz - Create compressed raw binary'
+ echo ' simpleImage.<dt> - ELF image with $(arch)/boot/dts/<dt>.dts linked in'
+ echo ' - stripped elf with fdt blob
+ echo ' simpleImage.<dt>.unstrip - full ELF image with fdt blob'
+ echo ' *_defconfig - Select default config from arch/microblaze/configs'
+ echo ''
+ echo ' Targets with <dt> embed a device tree blob inside the image'
+ echo ' These targets support board with firmware that does not'
+ echo ' support passing a device tree directly. Replace <dt> with the'
+ echo ' name of a dts file from the arch/microblaze/boot/dts/ directory'
+ echo ' (minus the .dts extension).'
endef
diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile
index c2bb043a029..21f13322a4c 100644
--- a/arch/microblaze/boot/Makefile
+++ b/arch/microblaze/boot/Makefile
@@ -2,10 +2,24 @@
# arch/microblaze/boot/Makefile
#
-targets := linux.bin linux.bin.gz
+obj-y += linked_dtb.o
+
+targets := linux.bin linux.bin.gz simpleImage.%
OBJCOPYFLAGS_linux.bin := -O binary
+# Where the DTS files live
+dtstree := $(srctree)/$(src)/dts
+
+# Ensure system.dtb exists
+$(obj)/linked_dtb.o: $(obj)/system.dtb
+
+# Generate system.dtb from $(DTB).dtb
+ifneq ($(DTB),system)
+$(obj)/system.dtb: $(obj)/$(DTB).dtb
+ $(call if_changed,cp)
+endif
+
$(obj)/linux.bin: vmlinux FORCE
[ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \
touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image"
@@ -16,4 +30,27 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE
$(call if_changed,gzip)
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
-clean-kernel += linux.bin linux.bin.gz
+quiet_cmd_cp = CP $< $@$2
+ cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
+
+quiet_cmd_strip = STRIP $@
+ cmd_strip = $(STRIP) -K _start -K _end -K __log_buf -K _fdt_start vmlinux -o $@
+
+$(obj)/simpleImage.%: vmlinux FORCE
+ $(call if_changed,cp,.unstrip)
+ $(call if_changed,strip)
+ @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+
+# Rule to build device tree blobs
+DTC = $(objtree)/scripts/dtc/dtc
+
+# Rule to build device tree blobs
+quiet_cmd_dtc = DTC $@
+ cmd_dtc = $(DTC) -O dtb -o $(obj)/$*.dtb -b 0 -p 1024 $(dtstree)/$*.dts
+
+$(obj)/%.dtb: $(dtstree)/%.dts FORCE
+ $(call if_changed,dtc)
+
+clean-kernel += linux.bin linux.bin.gz simpleImage.*
+
+clean-files += *.dtb
diff --git a/arch/microblaze/boot/dts/system.dts b/arch/microblaze/boot/dts/system.dts
new file mode 120000
index 00000000000..7cb657892f2
--- /dev/null
+++ b/arch/microblaze/boot/dts/system.dts
@@ -0,0 +1 @@
+../../platform/generic/system.dts \ No newline at end of file
diff --git a/arch/microblaze/boot/linked_dtb.S b/arch/microblaze/boot/linked_dtb.S
new file mode 100644
index 00000000000..cb2b537aebe
--- /dev/null
+++ b/arch/microblaze/boot/linked_dtb.S
@@ -0,0 +1,3 @@
+.section __fdt_blob,"a"
+.incbin "arch/microblaze/boot/system.dtb"
+
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 09c32962b66..bb7c374713a 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.31-rc6
-# Tue Aug 18 11:00:02 2009
+# Linux kernel version: 2.6.31
+# Thu Sep 24 10:28:50 2009
#
CONFIG_MICROBLAZE=y
# CONFIG_SWAP is not set
@@ -42,11 +42,12 @@ CONFIG_SYSVIPC_SYSCTL=y
#
# RCU Subsystem
#
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
# CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=17
@@ -260,6 +261,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
# CONFIG_NETFILTER is not set
# CONFIG_IP_DCCP is not set
# CONFIG_IP_SCTP is not set
+# CONFIG_RDS is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
@@ -357,12 +359,10 @@ CONFIG_NET_ETHERNET=y
# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
# CONFIG_KS8842 is not set
+CONFIG_XILINX_EMACLITE=y
CONFIG_NETDEV_1000=y
CONFIG_NETDEV_10000=y
-
-#
-# Wireless LAN
-#
+CONFIG_WLAN=y
# CONFIG_WLAN_PRE80211 is not set
# CONFIG_WLAN_80211 is not set
@@ -460,6 +460,7 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
# CONFIG_DISPLAY_SUPPORT is not set
# CONFIG_SOUND is not set
# CONFIG_USB_SUPPORT is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
# CONFIG_NEW_LEDS is not set
@@ -488,6 +489,7 @@ CONFIG_EXT2_FS=y
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
CONFIG_FILE_LOCKING=y
CONFIG_FSNOTIFY=y
# CONFIG_DNOTIFY is not set
@@ -546,7 +548,6 @@ CONFIG_MISC_FILESYSTEMS=y
# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
-# CONFIG_NILFS2_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
@@ -671,18 +672,20 @@ CONFIG_DEBUG_INFO=y
# CONFIG_DEBUG_LIST is not set
# CONFIG_DEBUG_SG is not set
# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_BACKTRACE_SELF_TEST is not set
# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_SYSCTL_SYSCALL_CHECK is not set
# CONFIG_PAGE_POISONING is not set
# CONFIG_SAMPLES is not set
# CONFIG_KMEMCHECK is not set
CONFIG_EARLY_PRINTK=y
-CONFIG_HEART_BEAT=y
+# CONFIG_HEART_BEAT is not set
CONFIG_DEBUG_BOOTMEM=y
#
@@ -697,7 +700,6 @@ CONFIG_CRYPTO=y
#
# Crypto core or helper
#
-# CONFIG_CRYPTO_FIPS is not set
# CONFIG_CRYPTO_MANAGER is not set
# CONFIG_CRYPTO_MANAGER2 is not set
# CONFIG_CRYPTO_GF128MUL is not set
@@ -729,11 +731,13 @@ CONFIG_CRYPTO=y
#
# CONFIG_CRYPTO_HMAC is not set
# CONFIG_CRYPTO_XCBC is not set
+# CONFIG_CRYPTO_VMAC is not set
#
# Digest
#
# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_GHASH is not set
# CONFIG_CRYPTO_MD4 is not set
# CONFIG_CRYPTO_MD5 is not set
# CONFIG_CRYPTO_MICHAEL_MIC is not set
diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig
index 8b638615a97..adb839bab70 100644
--- a/arch/microblaze/configs/nommu_defconfig
+++ b/arch/microblaze/configs/nommu_defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.31-rc6
-# Tue Aug 18 10:35:30 2009
+# Linux kernel version: 2.6.31
+# Thu Sep 24 10:29:43 2009
#
CONFIG_MICROBLAZE=y
# CONFIG_SWAP is not set
@@ -44,11 +44,12 @@ CONFIG_BSD_PROCESS_ACCT_V3=y
#
# RCU Subsystem
#
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
# CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=17
@@ -243,6 +244,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
# CONFIG_NETFILTER is not set
# CONFIG_IP_DCCP is not set
# CONFIG_IP_SCTP is not set
+# CONFIG_RDS is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
@@ -272,6 +274,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
# CONFIG_AF_RXRPC is not set
CONFIG_WIRELESS=y
# CONFIG_CFG80211 is not set
+CONFIG_CFG80211_DEFAULT_PS_VALUE=0
CONFIG_WIRELESS_OLD_REGULATORY=y
# CONFIG_WIRELESS_EXT is not set
# CONFIG_LIB80211 is not set
@@ -279,7 +282,6 @@ CONFIG_WIRELESS_OLD_REGULATORY=y
#
# CFG80211 needs to be enabled for MAC80211
#
-CONFIG_MAC80211_DEFAULT_PS_VALUE=0
# CONFIG_WIMAX is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set
@@ -304,6 +306,7 @@ CONFIG_MTD_PARTITIONS=y
# CONFIG_MTD_TESTS is not set
# CONFIG_MTD_REDBOOT_PARTS is not set
CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_OF_PARTS is not set
# CONFIG_MTD_AR7_PARTS is not set
#
@@ -349,6 +352,7 @@ CONFIG_MTD_RAM=y
#
# CONFIG_MTD_COMPLEX_MAPPINGS is not set
# CONFIG_MTD_PHYSMAP is not set
+# CONFIG_MTD_PHYSMAP_OF is not set
CONFIG_MTD_UCLINUX=y
# CONFIG_MTD_PLATRAM is not set
@@ -429,12 +433,10 @@ CONFIG_NET_ETHERNET=y
# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
# CONFIG_KS8842 is not set
+# CONFIG_XILINX_EMACLITE is not set
CONFIG_NETDEV_1000=y
CONFIG_NETDEV_10000=y
-
-#
-# Wireless LAN
-#
+CONFIG_WLAN=y
# CONFIG_WLAN_PRE80211 is not set
# CONFIG_WLAN_80211 is not set
@@ -535,7 +537,7 @@ CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
# CONFIG_USB_ARCH_HAS_OHCI is not set
-# CONFIG_USB_ARCH_HAS_EHCI is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
# CONFIG_USB is not set
# CONFIG_USB_OTG_WHITELIST is not set
# CONFIG_USB_OTG_BLACKLIST_HUB is not set
@@ -579,6 +581,7 @@ CONFIG_FS_POSIX_ACL=y
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
CONFIG_FILE_LOCKING=y
CONFIG_FSNOTIFY=y
# CONFIG_DNOTIFY is not set
@@ -639,7 +642,6 @@ CONFIG_ROMFS_BACKED_BY_BLOCK=y
CONFIG_ROMFS_ON_BLOCK=y
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
-# CONFIG_NILFS2_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
@@ -710,18 +712,20 @@ CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_BACKTRACE_SELF_TEST is not set
# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
# CONFIG_FAULT_INJECTION is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
# CONFIG_PAGE_POISONING is not set
# CONFIG_DYNAMIC_DEBUG is not set
# CONFIG_SAMPLES is not set
CONFIG_EARLY_PRINTK=y
-CONFIG_HEART_BEAT=y
+# CONFIG_HEART_BEAT is not set
# CONFIG_DEBUG_BOOTMEM is not set
#
@@ -736,7 +740,6 @@ CONFIG_CRYPTO=y
#
# Crypto core or helper
#
-# CONFIG_CRYPTO_FIPS is not set
# CONFIG_CRYPTO_MANAGER is not set
# CONFIG_CRYPTO_MANAGER2 is not set
# CONFIG_CRYPTO_GF128MUL is not set
@@ -768,11 +771,13 @@ CONFIG_CRYPTO=y
#
# CONFIG_CRYPTO_HMAC is not set
# CONFIG_CRYPTO_XCBC is not set
+# CONFIG_CRYPTO_VMAC is not set
#
# Digest
#
# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_GHASH is not set
# CONFIG_CRYPTO_MD4 is not set
# CONFIG_CRYPTO_MD5 is not set
# CONFIG_CRYPTO_MICHAEL_MIC is not set
diff --git a/arch/microblaze/include/asm/asm-compat.h b/arch/microblaze/include/asm/asm-compat.h
new file mode 100644
index 00000000000..e7bc9dc11b5
--- /dev/null
+++ b/arch/microblaze/include/asm/asm-compat.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_MICROBLAZE_ASM_COMPAT_H
+#define _ASM_MICROBLAZE_ASM_COMPAT_H
+
+#include <asm/types.h>
+
+#ifdef __ASSEMBLY__
+# define stringify_in_c(...) __VA_ARGS__
+# define ASM_CONST(x) x
+#else
+/* This version of stringify will deal with commas... */
+# define __stringify_in_c(...) #__VA_ARGS__
+# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+# define __ASM_CONST(x) x##UL
+# define ASM_CONST(x) __ASM_CONST(x)
+#endif
+
+#endif /* _ASM_MICROBLAZE_ASM_COMPAT_H */
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index 7c3ec13b44d..fc9997b73c0 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -210,6 +210,9 @@ static inline void __iomem *__ioremap(phys_addr_t address, unsigned long size,
#define in_be32(a) __raw_readl((const void __iomem __force *)(a))
#define in_be16(a) __raw_readw(a)
+#define writel_be(v, a) out_be32((__force unsigned *)a, v)
+#define readl_be(a) in_be32((__force unsigned *)a)
+
/*
* Little endian
*/
diff --git a/arch/microblaze/include/asm/ipc.h b/arch/microblaze/include/asm/ipc.h
deleted file mode 100644
index a46e3d9c2a3..00000000000
--- a/arch/microblaze/include/asm/ipc.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipc.h>
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 72aceae8868..880c988c223 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -17,6 +17,7 @@
#include <linux/pfn.h>
#include <asm/setup.h>
+#include <asm/asm-compat.h>
#include <linux/const.h>
#ifdef __KERNEL__
@@ -26,6 +27,8 @@
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
+#define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_KERNEL_BASE_ADDR))
+
#ifndef __ASSEMBLY__
#define PAGE_UP(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1)))
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index 27f8dafd8c3..ed67c9ed15b 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -38,7 +38,7 @@ extern void early_console_reg_tlb_alloc(unsigned int addr);
void time_init(void);
void init_IRQ(void);
void machine_early_init(const char *cmdline, unsigned int ram,
- unsigned int fdt);
+ unsigned int fdt, unsigned int msr);
void machine_restart(char *cmd);
void machine_shutdown(void);
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
new file mode 100644
index 00000000000..048dfcd8d89
--- /dev/null
+++ b/arch/microblaze/include/asm/syscall.h
@@ -0,0 +1,99 @@
+#ifndef __ASM_MICROBLAZE_SYSCALL_H
+#define __ASM_MICROBLAZE_SYSCALL_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+/* The system call number is given by the user in R12 */
+static inline long syscall_get_nr(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->r12;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ /* TODO. */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return IS_ERR_VALUE(regs->r3) ? regs->r3 : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->r3;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+ struct pt_regs *regs,
+ int error, long val)
+{
+ if (error)
+ regs->r3 = -error;
+ else
+ regs->r3 = val;
+}
+
+static inline microblaze_reg_t microblaze_get_syscall_arg(struct pt_regs *regs,
+ unsigned int n)
+{
+ switch (n) {
+ case 5: return regs->r10;
+ case 4: return regs->r9;
+ case 3: return regs->r8;
+ case 2: return regs->r7;
+ case 1: return regs->r6;
+ case 0: return regs->r5;
+ default:
+ BUG();
+ }
+ return ~0;
+}
+
+static inline void microblaze_set_syscall_arg(struct pt_regs *regs,
+ unsigned int n,
+ unsigned long val)
+{
+ switch (n) {
+ case 5:
+ regs->r10 = val;
+ case 4:
+ regs->r9 = val;
+ case 3:
+ regs->r8 = val;
+ case 2:
+ regs->r7 = val;
+ case 1:
+ regs->r6 = val;
+ case 0:
+ regs->r5 = val;
+ default:
+ BUG();
+ }
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ unsigned long *args)
+{
+ while (n--)
+ *args++ = microblaze_get_syscall_arg(regs, i++);
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ const unsigned long *args)
+{
+ while (n--)
+ microblaze_set_syscall_arg(regs, i++, *args++);
+}
+
+#endif /* __ASM_MICROBLAZE_SYSCALL_H */
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c
index c411c6757de..3539babc1c1 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -28,6 +28,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
{"7.10.d", 0x0b},
{"7.20.a", 0x0c},
{"7.20.b", 0x0d},
+ {"7.20.c", 0x0e},
/* FIXME There is no keycode defined in MBV for these versions */
{"2.10.a", 0x10},
{"3.00.a", 0x20},
@@ -49,6 +50,8 @@ const struct family_string_key family_string_lookup[] = {
{"spartan3a", 0xa},
{"spartan3an", 0xb},
{"spartan3adsp", 0xc},
+ {"spartan6", 0xd},
+ {"virtex6", 0xe},
/* FIXME There is no key code defined for spartan2 */
{"spartan2", 0xf0},
{NULL, 0},
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index c7353e79f4a..acc1f05d1e2 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -308,38 +308,69 @@ C_ENTRY(_user_exception):
swi r12, r1, PTO+PT_R0;
tovirt(r1,r1)
- la r15, r0, ret_from_trap-8
/* where the trap should return need -8 to adjust for rtsd r15, 8*/
/* Jump to the appropriate function for the system call number in r12
* (r12 is not preserved), or return an error if r12 is not valid. The LP
* register should point to the location where
* the called function should return. [note that MAKE_SYS_CALL uses label 1] */
- /* See if the system call number is valid. */
+
+ # Step into virtual mode.
+ set_vms;
+ addik r11, r0, 3f
+ rtid r11, 0
+ nop
+3:
+ add r11, r0, CURRENT_TASK /* Get current task ptr into r11 */
+ lwi r11, r11, TS_THREAD_INFO /* get thread info */
+ lwi r11, r11, TI_FLAGS /* get flags in thread info */
+ andi r11, r11, _TIF_WORK_SYSCALL_MASK
+ beqi r11, 4f
+
+ addik r3, r0, -ENOSYS
+ swi r3, r1, PTO + PT_R3
+ brlid r15, do_syscall_trace_enter
+ addik r5, r1, PTO + PT_R0
+
+ # do_syscall_trace_enter returns the new syscall nr.
+ addk r12, r0, r3
+ lwi r5, r1, PTO+PT_R5;
+ lwi r6, r1, PTO+PT_R6;
+ lwi r7, r1, PTO+PT_R7;
+ lwi r8, r1, PTO+PT_R8;
+ lwi r9, r1, PTO+PT_R9;
+ lwi r10, r1, PTO+PT_R10;
+4:
+/* Jump to the appropriate function for the system call number in r12
+ * (r12 is not preserved), or return an error if r12 is not valid.
+ * The LP register should point to the location where the called function
+ * should return. [note that MAKE_SYS_CALL uses label 1] */
+ /* See if the system call number is valid */
addi r11, r12, -__NR_syscalls;
- bgei r11,1f;
+ bgei r11,5f;
/* Figure out which function to use for this system call. */
/* Note Microblaze barrel shift is optional, so don't rely on it */
add r12, r12, r12; /* convert num -> ptr */
add r12, r12, r12;
/* Trac syscalls and stored them to r0_ram */
- lwi r3, r12, 0x400 + TOPHYS(r0_ram)
+ lwi r3, r12, 0x400 + r0_ram
addi r3, r3, 1
- swi r3, r12, 0x400 + TOPHYS(r0_ram)
+ swi r3, r12, 0x400 + r0_ram
+
+ # Find and jump into the syscall handler.
+ lwi r12, r12, sys_call_table
+ /* where the trap should return need -8 to adjust for rtsd r15, 8 */
+ la r15, r0, ret_from_trap-8
+ bra r12
- lwi r12, r12, TOPHYS(sys_call_table); /* Function ptr */
- /* Make the system call. to r12*/
- set_vms;
- rtid r12, 0;
- nop;
/* The syscall number is invalid, return an error. */
-1: VM_ON; /* RETURN() expects virtual mode*/
+5:
addi r3, r0, -ENOSYS;
rtsd r15,8; /* looks like a normal subroutine return */
or r0, r0, r0
-/* Entry point used to return from a syscall/trap. */
+/* Entry point used to return from a syscall/trap */
/* We re-enable BIP bit before state restore */
C_ENTRY(ret_from_trap):
set_bip; /* Ints masked for state restore*/
@@ -349,6 +380,23 @@ C_ENTRY(ret_from_trap):
/* We're returning to user mode, so check for various conditions that
* trigger rescheduling. */
+ # FIXME: Restructure all these flag checks.
+ add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+ lwi r11, r11, TS_THREAD_INFO; /* get thread info */
+ lwi r11, r11, TI_FLAGS; /* get flags in thread info */
+ andi r11, r11, _TIF_WORK_SYSCALL_MASK
+ beqi r11, 1f
+
+ swi r3, r1, PTO + PT_R3
+ swi r4, r1, PTO + PT_R4
+ brlid r15, do_syscall_trace_leave
+ addik r5, r1, PTO + PT_R0
+ lwi r3, r1, PTO + PT_R3
+ lwi r4, r1, PTO + PT_R4
+1:
+
+ /* We're returning to user mode, so check for various conditions that
+ * trigger rescheduling. */
/* Get current task ptr into r11 */
add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
lwi r11, r11, TS_THREAD_INFO; /* get thread info */
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index 0cb64a31e89..d9f70f83097 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -72,7 +72,8 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
#endif
#if 0
- printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n",
+ printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x " \
+ "ESR=%08x\n",
type, user_mode(regs) ? "user" : "kernel", fsr,
(unsigned int) regs->pc, (unsigned int) regs->esr);
#endif
@@ -80,42 +81,50 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
switch (type & 0x1F) {
case MICROBLAZE_ILL_OPCODE_EXCEPTION:
if (user_mode(regs)) {
- printk(KERN_WARNING "Illegal opcode exception in user mode.\n");
+ pr_debug(KERN_WARNING "Illegal opcode exception " \
+ "in user mode.\n");
_exception(SIGILL, regs, ILL_ILLOPC, addr);
return;
}
- printk(KERN_WARNING "Illegal opcode exception in kernel mode.\n");
+ printk(KERN_WARNING "Illegal opcode exception " \
+ "in kernel mode.\n");
die("opcode exception", regs, SIGBUS);
break;
case MICROBLAZE_IBUS_EXCEPTION:
if (user_mode(regs)) {
- printk(KERN_WARNING "Instruction bus error exception in user mode.\n");
+ pr_debug(KERN_WARNING "Instruction bus error " \
+ "exception in user mode.\n");
_exception(SIGBUS, regs, BUS_ADRERR, addr);
return;
}
- printk(KERN_WARNING "Instruction bus error exception in kernel mode.\n");
+ printk(KERN_WARNING "Instruction bus error exception " \
+ "in kernel mode.\n");
die("bus exception", regs, SIGBUS);
break;
case MICROBLAZE_DBUS_EXCEPTION:
if (user_mode(regs)) {
- printk(KERN_WARNING "Data bus error exception in user mode.\n");
+ pr_debug(KERN_WARNING "Data bus error exception " \
+ "in user mode.\n");
_exception(SIGBUS, regs, BUS_ADRERR, addr);
return;
}
- printk(KERN_WARNING "Data bus error exception in kernel mode.\n");
+ printk(KERN_WARNING "Data bus error exception " \
+ "in kernel mode.\n");
die("bus exception", regs, SIGBUS);
break;
case MICROBLAZE_DIV_ZERO_EXCEPTION:
if (user_mode(regs)) {
- printk(KERN_WARNING "Divide by zero exception in user mode\n");
- _exception(SIGILL, regs, ILL_ILLOPC, addr);
+ pr_debug(KERN_WARNING "Divide by zero exception " \
+ "in user mode\n");
+ _exception(SIGILL, regs, FPE_INTDIV, addr);
return;
}
- printk(KERN_WARNING "Divide by zero exception in kernel mode.\n");
+ printk(KERN_WARNING "Divide by zero exception " \
+ "in kernel mode.\n");
die("Divide by exception", regs, SIGBUS);
break;
case MICROBLAZE_FPU_EXCEPTION:
- printk(KERN_WARNING "FPU exception\n");
+ pr_debug(KERN_WARNING "FPU exception\n");
/* IEEE FP exception */
/* I removed fsr variable and use code var for storing fsr */
if (fsr & FSR_IO)
@@ -133,7 +142,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
#ifdef CONFIG_MMU
case MICROBLAZE_PRIVILEGED_EXCEPTION:
- printk(KERN_WARNING "Privileged exception\n");
+ pr_debug(KERN_WARNING "Privileged exception\n");
/* "brk r0,r0" - used as debug breakpoint */
if (get_user(code, (unsigned long *)regs->pc) == 0
&& code == 0x980c0000) {
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index e41c6ce2a7b..697ce3007f3 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -54,6 +54,16 @@ ENTRY(_start)
mfs r1, rmsr
andi r1, r1, ~2
mts rmsr, r1
+/*
+ * Here is checking mechanism which check if Microblaze has msr instructions
+ * We load msr and compare it with previous r1 value - if is the same,
+ * msr instructions works if not - cpu don't have them.
+ */
+ /* r8=0 - I have msr instr, 1 - I don't have them */
+ rsubi r0, r0, 1 /* set the carry bit */
+ msrclr r0, 0x4 /* try to clear it */
+ /* read the carry bit, r8 will be '0' if msrclr exists */
+ addik r8, r0, 0
/* r7 may point to an FDT, or there may be one linked in.
if it's in r7, we've got to save it away ASAP.
@@ -209,8 +219,8 @@ start_here:
* Please see $(ARCH)/mach-$(SUBARCH)/setup.c for
* the function.
*/
- la r8, r0, machine_early_init
- brald r15, r8
+ la r9, r0, machine_early_init
+ brald r15, r9
nop
#ifndef CONFIG_MMU
diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S
index 3288c973767..6b0288ebccd 100644
--- a/arch/microblaze/kernel/hw_exception_handler.S
+++ b/arch/microblaze/kernel/hw_exception_handler.S
@@ -84,9 +84,10 @@
#define NUM_TO_REG(num) r ## num
#ifdef CONFIG_MMU
-/* FIXME you can't change first load of MSR because there is
- * hardcoded jump bri 4 */
#define RESTORE_STATE \
+ lwi r5, r1, 0; \
+ mts rmsr, r5; \
+ nop; \
lwi r3, r1, PT_R3; \
lwi r4, r1, PT_R4; \
lwi r5, r1, PT_R5; \
@@ -309,6 +310,9 @@ _hw_exception_handler:
lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */
#endif
+ mfs r5, rmsr;
+ nop
+ swi r5, r1, 0;
mfs r3, resr
nop
mfs r4, rear;
@@ -380,6 +384,8 @@ handle_other_ex: /* Handle Other exceptions here */
addk r8, r17, r0; /* Load exception address */
bralid r15, full_exception; /* Branch to the handler */
nop;
+ mts r0, rfsr; /* Clear sticky fsr */
+ nop
/*
* Trigger execution of the signal handler by enabling
diff --git a/arch/microblaze/kernel/init_task.c b/arch/microblaze/kernel/init_task.c
index 67da22579b6..b5d711f94ff 100644
--- a/arch/microblaze/kernel/init_task.c
+++ b/arch/microblaze/kernel/init_task.c
@@ -19,9 +19,8 @@
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
-{ INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
struct task_struct init_task = INIT_TASK(init_task);
EXPORT_SYMBOL(init_task);
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 00b12c6d532..4201c743cc9 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -235,6 +235,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
regs->pc = pc;
regs->r1 = usp;
regs->pt_mode = 0;
+ regs->msr |= MSR_UMS;
}
#ifdef CONFIG_MMU
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 53ff39af6a5..4b3ac32754d 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -29,6 +29,10 @@
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/audit.h>
+#include <linux/seccomp.h>
+#include <linux/tracehook.h>
#include <linux/errno.h>
#include <asm/processor.h>
@@ -174,6 +178,64 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
return rval;
}
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
+{
+ long ret = 0;
+
+ secure_computing(regs->r12);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+ tracehook_report_syscall_entry(regs))
+ /*
+ * Tracing decided this syscall should not happen.
+ * We'll return a bogus call number to get an ENOSYS
+ * error, but leave the original number in regs->regs[0].
+ */
+ ret = -1L;
+
+ if (unlikely(current->audit_context))
+ audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12,
+ regs->r5, regs->r6,
+ regs->r7, regs->r8);
+
+ return ret ?: regs->r12;
+}
+
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
+{
+ int step;
+
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3);
+
+ step = test_thread_flag(TIF_SINGLESTEP);
+ if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, step);
+}
+
+#if 0
+static asmlinkage void syscall_trace(void)
+{
+ if (!test_thread_flag(TIF_SYSCALL_TRACE))
+ return;
+ if (!(current->ptrace & PT_PTRACED))
+ return;
+ /* The 0x80 provides a way for the tracing parent to distinguish
+ between a syscall stop and SIGTRAP delivery */
+ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ ? 0x80 : 0));
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+ * stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+}
+#endif
+
void ptrace_disable(struct task_struct *child)
{
/* nothing to do */
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 2a97bf513b6..8c1e0f4dcf1 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -94,7 +94,7 @@ inline unsigned get_romfs_len(unsigned *addr)
#endif /* CONFIG_MTD_UCLINUX_EBSS */
void __init machine_early_init(const char *cmdline, unsigned int ram,
- unsigned int fdt)
+ unsigned int fdt, unsigned int msr)
{
unsigned long *src, *dst = (unsigned long *)0x0;
@@ -157,6 +157,16 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
early_printk("New klimit: 0x%08x\n", (unsigned)klimit);
#endif
+#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
+ if (msr)
+ early_printk("!!!Your kernel has setup MSR instruction but "
+ "CPU don't have it %d\n", msr);
+#else
+ if (!msr)
+ early_printk("!!!Your kernel not setup MSR instruction but "
+ "CPU have it %d\n", msr);
+#endif
+
for (src = __ivt_start; src < __ivt_end; src++, dst++)
*dst = *src;
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index b96f1682bb2..07cabed4b94 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -23,7 +23,6 @@
#include <linux/mman.h>
#include <linux/sys.h>
#include <linux/ipc.h>
-#include <linux/utsname.h>
#include <linux/file.h>
#include <linux/module.h>
#include <linux/err.h>
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index ec5fa91a48d..e704188d785 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -12,13 +12,16 @@ OUTPUT_FORMAT("elf32-microblaze", "elf32-microblaze", "elf32-microblaze")
OUTPUT_ARCH(microblaze)
ENTRY(_start)
+#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
jiffies = jiffies_64 + 4;
SECTIONS {
. = CONFIG_KERNEL_START;
- .text : {
+ _start = CONFIG_KERNEL_BASE_ADDR;
+ .text : AT(ADDR(.text) - LOAD_OFFSET) {
_text = . ;
_stext = . ;
*(.text .text.*)
@@ -33,24 +36,22 @@ SECTIONS {
}
. = ALIGN (4) ;
- _fdt_start = . ; /* place for fdt blob */
- . = . + 0x4000;
- _fdt_end = . ;
+ __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) {
+ _fdt_start = . ; /* place for fdt blob */
+ *(__fdt_blob) ; /* Any link-placed DTB */
+ . = _fdt_start + 0x4000; /* Pad up to 16kbyte */
+ _fdt_end = . ;
+ }
. = ALIGN(16);
RODATA
- . = ALIGN(16);
- __ex_table : {
- __start___ex_table = .;
- *(__ex_table)
- __stop___ex_table = .;
- }
+ EXCEPTION_TABLE(16)
/*
* sdata2 section can go anywhere, but must be word aligned
* and SDA2_BASE must point to the middle of it
*/
- .sdata2 : {
+ .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) {
_ssrw = .;
. = ALIGN(4096); /* page aligned when MMU used - origin 0x8 */
*(.sdata2)
@@ -61,12 +62,7 @@ SECTIONS {
}
_sdata = . ;
- .data ALIGN (4096) : { /* page aligned when MMU used - origin 0x4 */
- DATA_DATA
- CONSTRUCTORS
- }
- . = ALIGN(32);
- .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+ RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
_edata = . ;
/* Reserve some low RAM for r0 based memory references */
@@ -74,18 +70,14 @@ SECTIONS {
r0_ram = . ;
. = . + 4096; /* a page should be enough */
- /* The initial task */
- . = ALIGN(8192);
- .data.init_task : { *(.data.init_task) }
-
/* Under the microblaze ABI, .sdata and .sbss must be contiguous */
. = ALIGN(8);
- .sdata : {
+ .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
_ssro = .;
*(.sdata)
}
- .sbss : {
+ .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) {
_ssbss = .;
*(.sbss)
_esbss = .;
@@ -96,47 +88,36 @@ SECTIONS {
__init_begin = .;
- . = ALIGN(4096);
- .init.text : {
- _sinittext = . ;
- INIT_TEXT
- _einittext = .;
- }
+ INIT_TEXT_SECTION(PAGE_SIZE)
- .init.data : {
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
INIT_DATA
}
. = ALIGN(4);
- .init.ivt : {
+ .init.ivt : AT(ADDR(.init.ivt) - LOAD_OFFSET) {
__ivt_start = .;
*(.init.ivt)
__ivt_end = .;
}
- .init.setup : {
- __setup_start = .;
- *(.init.setup)
- __setup_end = .;
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+ INIT_SETUP(0)
}
- .initcall.init : {
- __initcall_start = .;
- INITCALLS
- __initcall_end = .;
+ .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET ) {
+ INIT_CALLS
}
- .con_initcall.init : {
- __con_initcall_start = .;
- *(.con_initcall.init)
- __con_initcall_end = .;
+ .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+ CON_INITCALL
}
SECURITY_INIT
__init_end_before_initramfs = .;
- .init.ramfs ALIGN(4096) : {
+ .init.ramfs ALIGN(4096) : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
__initramfs_start = .;
*(.init.ramfs)
__initramfs_end = .;
@@ -152,7 +133,8 @@ SECTIONS {
}
__init_end = .;
- .bss ALIGN (4096) : { /* page aligned when MMU used */
+ .bss ALIGN (4096) : AT(ADDR(.bss) - LOAD_OFFSET) {
+ /* page aligned when MMU used */
__bss_start = . ;
*(.bss*)
*(COMMON)
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 1110784eb3f..a44892e7cd5 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -180,7 +180,8 @@ void free_initrd_mem(unsigned long start, unsigned long end)
totalram_pages++;
pages++;
}
- printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n", pages);
+ printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n",
+ (int)(pages * (PAGE_SIZE / 1024)));
}
#endif
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index c825b14b4ed..77f5021218d 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -627,16 +627,6 @@ endif
cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic
drivers-$(CONFIG_PCI) += arch/mips/pci/
-ifdef CONFIG_32BIT
-ifdef CONFIG_CPU_LITTLE_ENDIAN
-JIFFIES = jiffies_64
-else
-JIFFIES = jiffies_64 + 4
-endif
-else
-JIFFIES = jiffies_64
-endif
-
#
# Automatically detect the build format. By default we choose
# the elf format according to the load address.
@@ -660,8 +650,9 @@ ifdef CONFIG_64BIT
endif
KBUILD_AFLAGS += $(cflags-y)
-KBUILD_CFLAGS += $(cflags-y) \
- -D"VMLINUX_LOAD_ADDRESS=$(load-y)"
+KBUILD_CFLAGS += $(cflags-y)
+KBUILD_CPPFLAGS += -D"VMLINUX_LOAD_ADDRESS=$(load-y)"
+KBUILD_CPPFLAGS += -D"DATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)"
LDFLAGS += -m $(ld-emul)
@@ -676,18 +667,6 @@ endif
OBJCOPYFLAGS += --remove-section=.reginfo
-#
-# Choosing incompatible machines durings configuration will result in
-# error messages during linking. Select a default linkscript if
-# none has been choosen above.
-#
-
-CPPFLAGS_vmlinux.lds := \
- $(KBUILD_CFLAGS) \
- -D"LOADADDR=$(load-y)" \
- -D"JIFFIES=$(JIFFIES)" \
- -D"DATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)"
-
head-y := arch/mips/kernel/head.o arch/mips/kernel/init_task.o
libs-y += arch/mips/lib/
diff --git a/arch/mips/alchemy/common/time.c b/arch/mips/alchemy/common/time.c
index f34ff860194..379a664809b 100644
--- a/arch/mips/alchemy/common/time.c
+++ b/arch/mips/alchemy/common/time.c
@@ -88,7 +88,7 @@ static struct clock_event_device au1x_rtcmatch2_clockdev = {
.irq = AU1000_RTC_MATCH2_INT,
.set_next_event = au1x_rtcmatch2_set_next_event,
.set_mode = au1x_rtcmatch2_set_mode,
- .cpumask = CPU_MASK_ALL_PTR,
+ .cpumask = cpu_all_mask,
};
static struct irqaction au1x_rtcmatch2_irqaction = {
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 23059170700..f6837422fe6 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -24,12 +24,10 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
#define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid)
#define parent_node(node) (node)
-#define node_to_cpumask(node) (hub_data(node)->h_cpus)
#define cpumask_of_node(node) (&hub_data(node)->h_cpus)
struct pci_bus;
extern int pcibus_to_node(struct pci_bus *);
-#define pcibus_to_cpumask(bus) (cpu_online_map)
#define cpumask_of_pcibus(bus) (cpu_online_mask)
extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index d3bea88d874..d9743536a62 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -178,8 +178,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* Mark current->active_mm as not "active" anymore.
* We don't want to mislead possible IPI tlb flush routines.
*/
- cpu_clear(cpu, prev->cpu_vm_mask);
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
local_irq_restore(flags);
}
@@ -235,8 +235,8 @@ activate_mm(struct mm_struct *prev, struct mm_struct *next)
TLBMISS_HANDLER_SETUP_PGD(next->pgd);
/* mark mmu ownership change */
- cpu_clear(cpu, prev->cpu_vm_mask);
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
local_irq_restore(flags);
}
@@ -258,7 +258,7 @@ drop_mmu_context(struct mm_struct *mm, unsigned cpu)
local_irq_save(flags);
- if (cpu_isset(cpu, mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
get_new_mmu_context(mm, cpu);
#ifdef CONFIG_MIPS_MT_SMTC
/* See comments for similar code above */
diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h
index fd545547b8a..9e09af34c8a 100644
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h
@@ -19,7 +19,7 @@ struct task_struct;
struct plat_smp_ops {
void (*send_ipi_single)(int cpu, unsigned int action);
- void (*send_ipi_mask)(cpumask_t mask, unsigned int action);
+ void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action);
void (*init_secondary)(void);
void (*smp_finish)(void);
void (*cpus_done)(void);
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index aaa2d4ab26d..e15f11a0931 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -78,6 +78,6 @@ extern void play_dead(void);
extern asmlinkage void smp_call_function_interrupt(void);
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#endif /* __ASM_SMP_H */
diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c
index 5b457a40c78..6d6ca530589 100644
--- a/arch/mips/kernel/init_task.c
+++ b/arch/mips/kernel/init_task.c
@@ -21,9 +21,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
*
* The things we do for performance..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"),
- __aligned__(THREAD_SIZE))) =
+union thread_union init_thread_union __init_task_data
+ __attribute__((__aligned__(THREAD_SIZE))) =
{ INIT_THREAD_INFO(init_task) };
/*
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index ad0ff5dc4d5..cc81771b882 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -80,11 +80,11 @@ void cmp_send_ipi_single(int cpu, unsigned int action)
local_irq_restore(flags);
}
-static void cmp_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void cmp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
cmp_send_ipi_single(i, action);
}
@@ -171,7 +171,7 @@ void __init cmp_smp_setup(void)
for (i = 1; i < NR_CPUS; i++) {
if (amon_cpu_avail(i)) {
- cpu_set(i, cpu_possible_map);
+ set_cpu_possible(i, true);
__cpu_number_map[i] = ++ncpu;
__cpu_logical_map[ncpu] = i;
}
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 6f7ee5ac46e..43e7cdc5ded 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -70,7 +70,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
write_vpe_c0_vpeconf0(tmp);
/* Record this as available CPU */
- cpu_set(tc, cpu_possible_map);
+ set_cpu_possible(tc, true);
__cpu_number_map[tc] = ++ncpu;
__cpu_logical_map[ncpu] = tc;
}
@@ -141,11 +141,11 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action)
local_irq_restore(flags);
}
-static void vsmp_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void vsmp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
vsmp_send_ipi_single(i, action);
}
diff --git a/arch/mips/kernel/smp-up.c b/arch/mips/kernel/smp-up.c
index 2508d55d68f..00500fea275 100644
--- a/arch/mips/kernel/smp-up.c
+++ b/arch/mips/kernel/smp-up.c
@@ -18,7 +18,8 @@ static void up_send_ipi_single(int cpu, unsigned int action)
panic(KERN_ERR "%s called", __func__);
}
-static inline void up_send_ipi_mask(cpumask_t mask, unsigned int action)
+static inline void up_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action)
{
panic(KERN_ERR "%s called", __func__);
}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 64668a93248..4eb106c6a3e 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -128,7 +128,7 @@ asmlinkage __cpuinit void start_secondary(void)
cpu_idle();
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
mp_ops->send_ipi_mask(mask, SMP_CALL_FUNCTION);
}
@@ -183,15 +183,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
mp_ops->prepare_cpus(max_cpus);
set_cpu_sibling_map(0);
#ifndef CONFIG_HOTPLUG_CPU
- cpu_present_map = cpu_possible_map;
+ init_cpu_present(&cpu_possible_map);
#endif
}
/* preload SMP state for boot cpu */
void __devinit smp_prepare_boot_cpu(void)
{
- cpu_set(0, cpu_possible_map);
- cpu_set(0, cpu_online_map);
+ set_cpu_possible(0, true);
+ set_cpu_online(0, true);
cpu_set(0, cpu_callin_map);
}
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 1a466baf0ed..67153a0dc26 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -305,7 +305,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot)
*/
ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) {
- cpu_set(i, cpu_possible_map);
+ set_cpu_possible(i, true);
__cpu_number_map[i] = i;
__cpu_logical_map[i] = i;
}
@@ -525,8 +525,8 @@ void smtc_prepare_cpus(int cpus)
* Pull any physically present but unused TCs out of circulation.
*/
while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) {
- cpu_clear(tc, cpu_possible_map);
- cpu_clear(tc, cpu_present_map);
+ set_cpu_possible(tc, false);
+ set_cpu_present(tc, false);
tc++;
}
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 2769bed3d2a..9bf0e3df7c5 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -10,7 +10,16 @@ PHDRS {
text PT_LOAD FLAGS(7); /* RWX */
note PT_NOTE FLAGS(4); /* R__ */
}
-jiffies = JIFFIES;
+
+ifdef CONFIG_32BIT
+ ifdef CONFIG_CPU_LITTLE_ENDIAN
+ jiffies = jiffies_64;
+ else
+ jiffies = jiffies_64 + 4;
+ endif
+else
+ jiffies = jiffies_64;
+endif
SECTIONS
{
@@ -29,7 +38,7 @@ SECTIONS
/* . = 0xa800000000300000; */
. = 0xffffffff80300000;
#endif
- . = LOADADDR;
+ . = VMLINUX_LOAD_ADDRESS;
/* read-only */
_text = .; /* Text and read-only data */
.text : {
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 3f04d4c406b..b3deed8db61 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -56,12 +56,12 @@ int sysctl_lasatstring(ctl_table *table,
/* And the same for proc */
-int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
+int proc_dolasatstring(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int r;
- r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+ r = proc_dostring(table, write, buffer, lenp, ppos);
if ((!write) || r)
return r;
@@ -71,12 +71,12 @@ int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
}
/* proc function to write EEPROM after changing int entry */
-int proc_dolasatint(ctl_table *table, int write, struct file *filp,
+int proc_dolasatint(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int r;
- r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ r = proc_dointvec(table, write, buffer, lenp, ppos);
if ((!write) || r)
return r;
@@ -89,7 +89,7 @@ int proc_dolasatint(ctl_table *table, int write, struct file *filp,
static int rtctmp;
/* proc function to read/write RealTime Clock */
-int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
+int proc_dolasatrtc(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct timespec ts;
@@ -102,7 +102,7 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
if (rtctmp < 0)
rtctmp = 0;
}
- r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ r = proc_dointvec(table, write, buffer, lenp, ppos);
if (r)
return r;
@@ -154,7 +154,7 @@ int sysctl_lasat_rtc(ctl_table *table,
#endif
#ifdef CONFIG_INET
-int proc_lasat_ip(ctl_table *table, int write, struct file *filp,
+int proc_lasat_ip(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
unsigned int ip;
@@ -231,12 +231,12 @@ static int sysctl_lasat_prid(ctl_table *table,
return 0;
}
-int proc_lasat_prid(ctl_table *table, int write, struct file *filp,
+int proc_lasat_prid(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int r;
- r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ r = proc_dointvec(table, write, buffer, lenp, ppos);
if (r < 0)
return r;
if (write) {
diff --git a/arch/mips/mipssim/sim_smtc.c b/arch/mips/mipssim/sim_smtc.c
index d6e4f656ad1..5da30b6a65b 100644
--- a/arch/mips/mipssim/sim_smtc.c
+++ b/arch/mips/mipssim/sim_smtc.c
@@ -43,11 +43,12 @@ static void ssmtc_send_ipi_single(int cpu, unsigned int action)
/* "CPU" may be TC of same VPE, VPE of same CPU, or different CPU */
}
-static inline void ssmtc_send_ipi_mask(cpumask_t mask, unsigned int action)
+static inline void ssmtc_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
ssmtc_send_ipi_single(i, action);
}
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index 10ab69f7183..94e05e5733c 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -79,7 +79,7 @@ static void octeon_flush_icache_all_cores(struct vm_area_struct *vma)
* cores it has been used on
*/
if (vma)
- mask = vma->vm_mm->cpu_vm_mask;
+ mask = *mm_cpumask(vma->vm_mm);
else
mask = cpu_online_map;
cpu_clear(cpu, mask);
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 499ffe5475d..192cfd2a539 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -21,11 +21,11 @@ static void msmtc_send_ipi_single(int cpu, unsigned int action)
smtc_send_ipi(cpu, LINUX_SMP_IPI, action);
}
-static void msmtc_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void msmtc_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
msmtc_send_ipi_single(i, action);
}
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index 8ace2771623..326fe7a392e 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -97,11 +97,11 @@ static void yos_send_ipi_single(int cpu, unsigned int action)
}
}
-static void yos_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void yos_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
yos_send_ipi_single(i, action);
}
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 060d853d7b3..f61c164d1e6 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -421,7 +421,7 @@ static void __init node_mem_init(cnodeid_t node)
/*
* A node with nothing. We use it to avoid any special casing in
- * node_to_cpumask
+ * cpumask_of_node
*/
static struct node_data null_node = {
.hub = {
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index cbcd7eb83bd..9aa8f2951df 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -165,11 +165,11 @@ static void ip27_send_ipi_single(int destid, unsigned int action)
REMOTE_HUB_SEND_INTR(COMPACT_TO_NASID_NODEID(cpu_to_node(destid)), irq);
}
-static void ip27_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void ip27_send_ipi(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
ip27_send_ipi_single(i, action);
}
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 314691648c9..47b347c992e 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -82,11 +82,12 @@ static void bcm1480_send_ipi_single(int cpu, unsigned int action)
__raw_writeq((((u64)action)<< 48), mailbox_0_set_regs[cpu]);
}
-static void bcm1480_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void bcm1480_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
bcm1480_send_ipi_single(i, action);
}
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index cad14003b84..c00a5cb1128 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -70,11 +70,12 @@ static void sb1250_send_ipi_single(int cpu, unsigned int action)
__raw_writeq((((u64)action) << 48), mailbox_set_regs[cpu]);
}
-static inline void sb1250_send_ipi_mask(cpumask_t mask, unsigned int action)
+static inline void sb1250_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
sb1250_send_ipi_single(i, action);
}
diff --git a/arch/mn10300/include/asm/mmu_context.h b/arch/mn10300/include/asm/mmu_context.h
index a9e2e34f69b..cb294c244de 100644
--- a/arch/mn10300/include/asm/mmu_context.h
+++ b/arch/mn10300/include/asm/mmu_context.h
@@ -38,13 +38,13 @@ extern unsigned long mmu_context_cache[NR_CPUS];
#define enter_lazy_tlb(mm, tsk) do {} while (0)
#ifdef CONFIG_SMP
-#define cpu_ran_vm(cpu, task) \
- cpu_set((cpu), (task)->cpu_vm_mask)
-#define cpu_maybe_ran_vm(cpu, task) \
- cpu_test_and_set((cpu), (task)->cpu_vm_mask)
+#define cpu_ran_vm(cpu, mm) \
+ cpumask_set_cpu((cpu), mm_cpumask(mm))
+#define cpu_maybe_ran_vm(cpu, mm) \
+ cpumask_test_and_set_cpu((cpu), mm_cpumask(mm))
#else
-#define cpu_ran_vm(cpu, task) do {} while (0)
-#define cpu_maybe_ran_vm(cpu, task) true
+#define cpu_ran_vm(cpu, mm) do {} while (0)
+#define cpu_maybe_ran_vm(cpu, mm) true
#endif /* CONFIG_SMP */
/*
diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c
index 80d423b80af..a481b043bea 100644
--- a/arch/mn10300/kernel/init_task.c
+++ b/arch/mn10300/kernel/init_task.c
@@ -27,9 +27,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
index 3e52a105432..8ca5af00334 100644
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -19,7 +19,6 @@
#include <linux/stat.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/tty.h>
#include <asm/uaccess.h>
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index da6f66901c9..55cca1dac43 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -118,8 +118,8 @@ define archhelp
@echo '* vmlinux - Uncompressed kernel image (./vmlinux)'
@echo ' palo - Bootable image (./lifimage)'
@echo ' install - Install kernel using'
- @echo ' (your) ~/bin/installkernel or'
- @echo ' (distribution) /sbin/installkernel or'
+ @echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ @echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
@echo ' copy to $$(INSTALL_PATH)'
endef
diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h
index 1e1c824764e..5f39d5597ce 100644
--- a/arch/parisc/include/asm/fcntl.h
+++ b/arch/parisc/include/asm/fcntl.h
@@ -28,6 +28,8 @@
#define F_SETOWN 12 /* for sockets. */
#define F_SETSIG 13 /* for sockets. */
#define F_GETSIG 14 /* for sockets. */
+#define F_GETOWN_EX 15
+#define F_SETOWN_EX 16
/* for posix fcntl() and lockf() */
#define F_RDLCK 01
diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h
index 21eb45a5262..2e73623feb6 100644
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -30,7 +30,6 @@ extern void smp_send_all_nop(void);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
#endif /* !ASSEMBLY */
diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh
index 9632b3e164c..e593fc8d58b 100644
--- a/arch/parisc/install.sh
+++ b/arch/parisc/install.sh
@@ -21,8 +21,8 @@
# User may have a custom install script
-if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
-if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install
diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c
index 82974b20fc1..d020eae6525 100644
--- a/arch/parisc/kernel/init_task.c
+++ b/arch/parisc/kernel/init_task.c
@@ -43,8 +43,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((aligned(128))) __attribute__((__section__(".data.init_task"))) =
+union thread_union init_thread_union __init_task_data
+ __attribute__((aligned(128))) =
{ INIT_THREAD_INFO(init_task) };
#if PT_NLEVELS == 3
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 92a0acaa0d1..561388b17c9 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -18,7 +18,6 @@
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/times.h>
-#include <linux/utsname.h>
#include <linux/time.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 952a3963e9e..aacf629c1a9 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -158,8 +158,6 @@ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
# Default to zImage, override when needed
all: zImage
-CPPFLAGS_vmlinux.lds := -Upowerpc
-
BOOT_TARGETS = zImage zImage.initrd uImage zImage% dtbImage% treeImage.% cuImage.% simpleImage.%
PHONY += $(BOOT_TARGETS)
@@ -182,8 +180,8 @@ define archhelp
@echo ' simpleImage.<dt> - Firmware independent image.'
@echo ' treeImage.<dt> - Support for older IBM 4xx firmware (not U-Boot)'
@echo ' install - Install kernel using'
- @echo ' (your) ~/bin/installkernel or'
- @echo ' (distribution) /sbin/installkernel or'
+ @echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ @echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
@echo ' install to $$(INSTALL_PATH) and run lilo'
@echo ' *_defconfig - Select default config from arch/$(ARCH)/configs'
@echo ''
diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh
index 98312d169c8..b6a256bc96e 100644
--- a/arch/powerpc/boot/install.sh
+++ b/arch/powerpc/boot/install.sh
@@ -23,8 +23,8 @@ set -e
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install
diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h
new file mode 100644
index 00000000000..a67aeed17d4
--- /dev/null
+++ b/arch/powerpc/include/asm/fsldma.h
@@ -0,0 +1,136 @@
+/*
+ * Freescale MPC83XX / MPC85XX DMA Controller
+ *
+ * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__
+#define __ARCH_POWERPC_ASM_FSLDMA_H__
+
+#include <linux/dmaengine.h>
+
+/*
+ * Definitions for the Freescale DMA controller's DMA_SLAVE implemention
+ *
+ * The Freescale DMA_SLAVE implementation was designed to handle many-to-many
+ * transfers. An example usage would be an accelerated copy between two
+ * scatterlists. Another example use would be an accelerated copy from
+ * multiple non-contiguous device buffers into a single scatterlist.
+ *
+ * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This
+ * structure contains a list of hardware addresses that should be copied
+ * to/from the scatterlist passed into device_prep_slave_sg(). The structure
+ * also has some fields to enable hardware-specific features.
+ */
+
+/**
+ * struct fsl_dma_hw_addr
+ * @entry: linked list entry
+ * @address: the hardware address
+ * @length: length to transfer
+ *
+ * Holds a single physical hardware address / length pair for use
+ * with the DMAEngine DMA_SLAVE API.
+ */
+struct fsl_dma_hw_addr {
+ struct list_head entry;
+
+ dma_addr_t address;
+ size_t length;
+};
+
+/**
+ * struct fsl_dma_slave
+ * @addresses: a linked list of struct fsl_dma_hw_addr structures
+ * @request_count: value for DMA request count
+ * @src_loop_size: setup and enable constant source-address DMA transfers
+ * @dst_loop_size: setup and enable constant destination address DMA transfers
+ * @external_start: enable externally started DMA transfers
+ * @external_pause: enable externally paused DMA transfers
+ *
+ * Holds a list of address / length pairs for use with the DMAEngine
+ * DMA_SLAVE API implementation for the Freescale DMA controller.
+ */
+struct fsl_dma_slave {
+
+ /* List of hardware address/length pairs */
+ struct list_head addresses;
+
+ /* Support for extra controller features */
+ unsigned int request_count;
+ unsigned int src_loop_size;
+ unsigned int dst_loop_size;
+ bool external_start;
+ bool external_pause;
+};
+
+/**
+ * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave
+ * @slave: the &struct fsl_dma_slave to add to
+ * @address: the hardware address to add
+ * @length: the length of bytes to transfer from @address
+ *
+ * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on
+ * success, -ERRNO otherwise.
+ */
+static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave,
+ dma_addr_t address, size_t length)
+{
+ struct fsl_dma_hw_addr *addr;
+
+ addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
+ if (!addr)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&addr->entry);
+ addr->address = address;
+ addr->length = length;
+
+ list_add_tail(&addr->entry, &slave->addresses);
+ return 0;
+}
+
+/**
+ * fsl_dma_slave_free - free a struct fsl_dma_slave
+ * @slave: the struct fsl_dma_slave to free
+ *
+ * Free a struct fsl_dma_slave and all associated address/length pairs
+ */
+static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave)
+{
+ struct fsl_dma_hw_addr *addr, *tmp;
+
+ if (slave) {
+ list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) {
+ list_del(&addr->entry);
+ kfree(addr);
+ }
+
+ kfree(slave);
+ }
+}
+
+/**
+ * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave
+ * @gfp: the flags to pass to kmalloc when allocating this structure
+ *
+ * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new
+ * struct fsl_dma_slave on success, or NULL on failure.
+ */
+static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp)
+{
+ struct fsl_dma_slave *slave;
+
+ slave = kzalloc(sizeof(*slave), gfp);
+ if (!slave)
+ return NULL;
+
+ INIT_LIST_HEAD(&slave->addresses);
+ return slave;
+}
+
+#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index c0d3b8af931..d9ea8d39c34 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -146,7 +146,7 @@ extern void smp_generic_take_timebase(void);
extern struct smp_ops_t *smp_ops;
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
/* Definitions relative to the secondary CPU spin loop
* and entry point. Not all of them exist on both 32 and
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 394edcbcce7..22f738d12ad 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -17,11 +17,6 @@ static inline int cpu_to_node(int cpu)
#define parent_node(node) (node)
-static inline cpumask_t node_to_cpumask(int node)
-{
- return numa_cpumask_lookup_table[node];
-}
-
#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
int of_node_to_nid(struct device_node *device);
@@ -36,11 +31,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
}
#endif
-#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \
- CPU_MASK_ALL : \
- node_to_cpumask(pcibus_to_node(bus)) \
- )
-
#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))
@@ -104,8 +94,6 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
#ifdef CONFIG_PPC64
#include <asm/smp.h>
-#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
-#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
index ffc4253fef5..2375b7eb1c7 100644
--- a/arch/powerpc/kernel/init_task.c
+++ b/arch/powerpc/kernel/init_task.c
@@ -16,9 +16,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 49e705fcee6..040bd1de8d9 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -13,6 +13,7 @@
#include <linux/kexec.h>
#include <linux/smp.h>
#include <linux/thread_info.h>
+#include <linux/init_task.h>
#include <linux/errno.h>
#include <asm/page.h>
@@ -249,8 +250,8 @@ static void kexec_prepare_cpus(void)
* We could use a smaller stack if we don't care about anything using
* current, but that audit has not been performed.
*/
-static union thread_union kexec_stack
- __attribute__((__section__(".data.init_task"))) = { };
+static union thread_union kexec_stack __init_task_data =
+ { };
/* Our assembly helper, in kexec_stub.S */
extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1d5570a1e45..4271f7a655a 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -24,7 +24,6 @@
#include <linux/seq_file.h>
#include <linux/ioport.h>
#include <linux/console.h>
-#include <linux/utsname.h>
#include <linux/screen_info.h>
#include <linux/root_dev.h>
#include <linux/notifier.h>
@@ -432,9 +431,9 @@ void __init smp_setup_cpu_maps(void)
for (j = 0; j < nthreads && cpu < NR_CPUS; j++) {
DBG(" thread %d -> cpu %d (hard id %d)\n",
j, cpu, intserv[j]);
- cpu_set(cpu, cpu_present_map);
+ set_cpu_present(cpu, true);
set_hard_smp_processor_id(cpu, intserv[j]);
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_possible(cpu, true);
cpu++;
}
}
@@ -480,7 +479,7 @@ void __init smp_setup_cpu_maps(void)
maxcpus);
for (cpu = 0; cpu < maxcpus; cpu++)
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_possible(cpu, true);
out:
of_node_put(dn);
}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index d387b3937cc..9b86a74d281 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -189,11 +189,11 @@ void arch_send_call_function_single_ipi(int cpu)
smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
unsigned int cpu;
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu(cpu, mask)
smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
@@ -287,7 +287,7 @@ void __devinit smp_prepare_boot_cpu(void)
{
BUG_ON(smp_processor_id() != boot_cpuid);
- cpu_set(boot_cpuid, cpu_online_map);
+ set_cpu_online(boot_cpuid, true);
cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid));
cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid));
#ifdef CONFIG_PPC64
@@ -307,7 +307,7 @@ int generic_cpu_disable(void)
if (cpu == boot_cpuid)
return -EBUSY;
- cpu_clear(cpu, cpu_online_map);
+ set_cpu_online(cpu, false);
#ifdef CONFIG_PPC64
vdso_data->processorCount--;
fixup_irqs(cpu_online_map);
@@ -361,7 +361,7 @@ void generic_mach_cpu_die(void)
smp_wmb();
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
cpu_relax();
- cpu_set(cpu, cpu_online_map);
+ set_cpu_online(cpu, true);
local_irq_enable();
}
#endif
@@ -508,7 +508,7 @@ int __devinit start_secondary(void *unused)
ipi_call_lock();
notify_cpu_starting(cpu);
- cpu_set(cpu, cpu_online_map);
+ set_cpu_online(cpu, true);
/* Update sibling maps */
base = cpu_first_thread_in_core(cpu);
for (i = 0; i < threads_per_core; i++) {
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 1cc5e9e5da9..b97c2d67f4a 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -22,7 +22,6 @@
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/times.h>
-#include <linux/utsname.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/sem.h>
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index a0abce251d0..3faaf29bdb2 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -1,3 +1,4 @@
+
/*
* Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
@@ -74,7 +75,7 @@ static int vdso_ready;
static union {
struct vdso_data data;
u8 page[PAGE_SIZE];
-} vdso_data_store __attribute__((__section__(".data.page_aligned")));
+} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;
/* Format of the patch table */
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index b54b8168813..51ead52141b 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -16,7 +16,7 @@ GCOV_PROFILE := n
EXTRA_CFLAGS := -shared -fno-common -fno-builtin
EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
EXTRA_AFLAGS := -D__VDSO32__ -s
obj-y += vdso32_wrapper.o
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
index 556f0caa5d8..6e8f507ed32 100644
--- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
@@ -1,7 +1,8 @@
#include <linux/init.h>
+#include <linux/linkage.h>
#include <asm/page.h>
- .section ".data.page_aligned"
+ __PAGE_ALIGNED_DATA
.globl vdso32_start, vdso32_end
.balign PAGE_SIZE
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index dd0c8e93677..79da65d44a2 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -11,7 +11,7 @@ GCOV_PROFILE := n
EXTRA_CFLAGS := -shared -fno-common -fno-builtin
EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
EXTRA_AFLAGS := -D__VDSO64__ -s
obj-y += vdso64_wrapper.o
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
index 0529cb9e3b9..b8553d62b79 100644
--- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
@@ -1,7 +1,8 @@
#include <linux/init.h>
+#include <linux/linkage.h>
#include <asm/page.h>
- .section ".data.page_aligned"
+ __PAGE_ALIGNED_DATA
.globl vdso64_start, vdso64_end
.balign PAGE_SIZE
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 937a38e7317..b40c22d697f 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -320,7 +320,7 @@ static int __init smp_psurge_probe(void)
if (ncpus > NR_CPUS)
ncpus = NR_CPUS;
for (i = 1; i < ncpus ; ++i)
- cpu_set(i, cpu_present_map);
+ set_cpu_present(i, true);
if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
@@ -867,7 +867,7 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr)
int smp_core99_cpu_disable(void)
{
- cpu_clear(smp_processor_id(), cpu_online_map);
+ set_cpu_online(smp_processor_id(), false);
/* XXX reset cpu affinity here */
mpic_cpu_set_priority(0xf);
@@ -952,7 +952,7 @@ void __init pmac_setup_smp(void)
int cpu;
for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu)
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_possible(cpu, true);
smp_ops = &psurge_smp_ops;
}
#endif /* CONFIG_PPC32 */
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index a20ead87153..ebff6d9a4e3 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -94,7 +94,7 @@ static int pseries_cpu_disable(void)
{
int cpu = smp_processor_id();
- cpu_clear(cpu, cpu_online_map);
+ set_cpu_online(cpu, false);
vdso_data->processorCount--;
/*fix boot_cpuid here*/
@@ -185,7 +185,7 @@ static int pseries_add_processor(struct device_node *np)
for_each_cpu_mask(cpu, tmp) {
BUG_ON(cpu_isset(cpu, cpu_present_map));
- cpu_set(cpu, cpu_present_map);
+ set_cpu_present(cpu, true);
set_hard_smp_processor_id(cpu, *intserv++);
}
err = 0;
@@ -217,7 +217,7 @@ static void pseries_remove_processor(struct device_node *np)
if (get_hard_smp_processor_id(cpu) != intserv[i])
continue;
BUG_ON(cpu_online(cpu));
- cpu_clear(cpu, cpu_present_map);
+ set_cpu_present(cpu, false);
set_hard_smp_processor_id(cpu, -1);
break;
}
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 264528e4f58..b55fd7ed1c3 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -50,10 +50,9 @@ static struct platform_device *appldata_pdev;
* /proc entries (sysctl)
*/
static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+static int appldata_timer_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
static int appldata_interval_handler(ctl_table *ctl, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos);
@@ -247,7 +246,7 @@ __appldata_vtimer_setup(int cmd)
* Start/Stop timer, show status of timer (0 = not active, 1 = active)
*/
static int
-appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_timer_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int len;
@@ -289,7 +288,7 @@ out:
* current timer interval.
*/
static int
-appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_interval_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int len, interval;
@@ -335,7 +334,7 @@ out:
* monitoring (0 = not in process, 1 = in process)
*/
static int
-appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_generic_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct appldata_ops *ops = NULL, *tmp_ops;
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index d4026f62cb0..aed3069699b 100644
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -21,8 +21,8 @@
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index c991fe6473c..a868b272c25 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -62,7 +62,7 @@ extern struct mutex smp_cpu_state_mutex;
extern int smp_cpu_polarization[];
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#endif
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 5e0ad618dc4..6e7211abd95 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -9,7 +9,6 @@ const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
extern cpumask_t cpu_core_map[NR_CPUS];
-#define topology_core_siblings(cpu) (cpu_core_map[cpu])
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
int topology_set_cpu_management(int fc);
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 5519cb74510..0debcec23a3 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -24,7 +24,6 @@
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/times.h>
-#include <linux/utsname.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/sem.h>
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4c512561687..20f282c911c 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -881,11 +881,11 @@ static int debug_active=1;
* if debug_active is already off
*/
static int
-s390dbf_procactive(ctl_table *table, int write, struct file *filp,
+s390dbf_procactive(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
- return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec(table, write, buffer, lenp, ppos);
else
return 0;
}
diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c
index fe787f9e5f3..4d1c9fb0b54 100644
--- a/arch/s390/kernel/init_task.c
+++ b/arch/s390/kernel/init_task.c
@@ -25,9 +25,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 59fe6ecc6ed..5417eb57271 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -27,7 +27,6 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/notifier.h>
-#include <linux/utsname.h>
#include <linux/tick.h>
#include <linux/elfcore.h>
#include <linux/kernel_stat.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index b4b6396e6cf..c932caa5e85 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -147,11 +147,11 @@ static void smp_ext_bitcall(int cpu, ec_bit_sig sig)
udelay(10);
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
int cpu;
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu(cpu, mask)
smp_ext_bitcall(cpu, ec_call_function);
}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 45e1708b70f..45a3e9a7ae2 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -75,7 +75,7 @@ __setup("vdso=", vdso_setup);
static union {
struct vdso_data data;
u8 page[PAGE_SIZE];
-} vdso_data_store __attribute__((__section__(".data.page_aligned")));
+} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;
/*
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index ca78ad60ba2..d13e8755a8c 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_31 += -m31 -s
KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
index 61639a89e70..ae42f8ce350 100644
--- a/arch/s390/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S
@@ -1,7 +1,8 @@
#include <linux/init.h>
+#include <linux/linkage.h>
#include <asm/page.h>
- .section ".data.page_aligned"
+ __PAGE_ALIGNED_DATA
.globl vdso32_start, vdso32_end
.balign PAGE_SIZE
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 6fc8e829258..449352dda9c 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_64 += -m64 -s
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S
index d8e2ac14d56..c245842b516 100644
--- a/arch/s390/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S
@@ -1,7 +1,8 @@
#include <linux/init.h>
+#include <linux/linkage.h>
#include <asm/page.h>
- .section ".data.page_aligned"
+ __PAGE_ALIGNED_DATA
.globl vdso64_start, vdso64_end
.balign PAGE_SIZE
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 413c240cbca..b201135cc18 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -262,7 +262,7 @@ cmm_skip_blanks(char *cp, char **endp)
static struct ctl_table cmm_table[];
static int
-cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_pages_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char buf[16], *p;
@@ -303,7 +303,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
}
static int
-cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_timeout_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char buf[64], *p;
diff --git a/arch/score/kernel/init_task.c b/arch/score/kernel/init_task.c
index ff952f6c63f..baa03ee217d 100644
--- a/arch/score/kernel/init_task.c
+++ b/arch/score/kernel/init_task.c
@@ -34,9 +34,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"), __aligned__(THREAD_SIZE))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/sh/boot/compressed/install.sh b/arch/sh/boot/compressed/install.sh
index 90589f0fec1..f9f41818b17 100644
--- a/arch/sh/boot/compressed/install.sh
+++ b/arch/sh/boot/compressed/install.sh
@@ -23,8 +23,8 @@
# User may have a custom install script
-if [ -x /sbin/installkernel ]; then
- exec /sbin/installkernel "$@"
+if [ -x /sbin/${INSTALLKERNEL} ]; then
+ exec /sbin/${INSTALLKERNEL} "$@"
fi
if [ "$2" = "zImage" ]; then
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig
index b91fa8dbf04..4d58eb0973d 100644
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -1,12 +1,9 @@
menu "DMA support"
-config SH_DMA_API
- bool
config SH_DMA
bool "SuperH on-chip DMA controller (DMAC) support"
depends on CPU_SH3 || CPU_SH4
- select SH_DMA_API
default n
config SH_DMA_IRQ_MULTI
@@ -19,6 +16,15 @@ config SH_DMA_IRQ_MULTI
CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
CPU_SUBTYPE_SH7760
+config SH_DMA_API
+ depends on SH_DMA
+ bool "SuperH DMA API support"
+ default n
+ help
+ SH_DMA_API always enabled DMA API of used SuperH.
+ If you want to use DMA ENGINE, you must not enable this.
+ Please enable DMA_ENGINE and SH_DMAE.
+
config NR_ONCHIP_DMA_CHANNELS
int
depends on SH_DMA
diff --git a/arch/sh/drivers/dma/Makefile b/arch/sh/drivers/dma/Makefile
index c6068137b46..d88c9484762 100644
--- a/arch/sh/drivers/dma/Makefile
+++ b/arch/sh/drivers/dma/Makefile
@@ -2,8 +2,7 @@
# Makefile for the SuperH DMA specific kernel interface routines under Linux.
#
-obj-$(CONFIG_SH_DMA_API) += dma-api.o dma-sysfs.o
-obj-$(CONFIG_SH_DMA) += dma-sh.o
+obj-$(CONFIG_SH_DMA_API) += dma-sh.o dma-api.o dma-sysfs.o
obj-$(CONFIG_PVR2_DMA) += dma-pvr2.o
obj-$(CONFIG_G2_DMA) += dma-g2.o
obj-$(CONFIG_SH_DMABRG) += dmabrg.o
diff --git a/arch/sh/include/asm/dma-sh.h b/arch/sh/include/asm/dma-sh.h
index 68a5f4cb034..78eed3e0bdf 100644
--- a/arch/sh/include/asm/dma-sh.h
+++ b/arch/sh/include/asm/dma-sh.h
@@ -116,4 +116,17 @@ static u32 dma_base_addr[] __maybe_unused = {
#define CHCR 0x0C
#define DMAOR 0x40
+/*
+ * for dma engine
+ *
+ * SuperH DMA mode
+ */
+#define SHDMA_MIX_IRQ (1 << 1)
+#define SHDMA_DMAOR1 (1 << 2)
+#define SHDMA_DMAE1 (1 << 3)
+
+struct sh_dmae_pdata {
+ unsigned int mode;
+};
+
#endif /* __DMA_SH_H */
diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index ca64f43abe6..53ef26ced75 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -44,7 +44,6 @@ void plat_send_ipi(unsigned int cpu, unsigned int message);
void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
#else
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index f8c40cc6505..65e7bd2f224 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -31,7 +31,6 @@
#define cpu_to_node(cpu) ((void)(cpu),0)
#define parent_node(node) ((void)(node),0)
-#define node_to_cpumask(node) ((void)node, cpu_online_map)
#define cpumask_of_node(node) ((void)node, cpu_online_mask)
#define pcibus_to_node(bus) ((void)(bus), -1)
diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c
index 1719957c0a6..11f2ea556a6 100644
--- a/arch/sh/kernel/init_task.c
+++ b/arch/sh/kernel/init_task.c
@@ -17,9 +17,8 @@ struct pt_regs fake_swapper_regs;
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 60f8af4497c..7cb933ba495 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -165,11 +165,9 @@ asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs)
}
#ifdef CONFIG_IRQSTACKS
-static char softirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__section__(".bss.page_aligned")));
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__section__(".bss.page_aligned")));
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
/*
* allocate per-cpu stacks for hardirq and for softirq processing
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index 63ba12836ea..eb68bfdd86e 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -9,7 +9,6 @@
#include <linux/syscalls.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/ipc.h>
diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c
index 91fb8445a5a..287235768bc 100644
--- a/arch/sh/kernel/sys_sh64.c
+++ b/arch/sh/kernel/sys_sh64.c
@@ -23,7 +23,6 @@
#include <linux/stat.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/syscalls.h>
#include <linux/ipc.h>
#include <asm/uaccess.h>
diff --git a/arch/sh/kernel/vsyscall/Makefile b/arch/sh/kernel/vsyscall/Makefile
index 4bbce1cfa35..8f0ea5fc835 100644
--- a/arch/sh/kernel/vsyscall/Makefile
+++ b/arch/sh/kernel/vsyscall/Makefile
@@ -15,7 +15,7 @@ quiet_cmd_syscall = SYSCALL $@
export CPPFLAGS_vsyscall.lds += -P -C -Ush
vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
SYSCFLAGS_vsyscall-trapa.so = $(vsyscall-flags)
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 467221dd570..dfe272d1446 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -31,7 +31,6 @@ export BITS := 32
#KBUILD_CFLAGS += -g -pipe -fcall-used-g5 -fcall-used-g7
KBUILD_CFLAGS += -m32 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
KBUILD_AFLAGS += -m32
-CPPFLAGS_vmlinux.lds += -m32
#LDFLAGS_vmlinux = -N -Ttext 0xf0004000
# Since 2.5.40, the first stage is left not btfix-ed.
@@ -45,9 +44,6 @@ else
CHECKFLAGS += -D__sparc__ -D__sparc_v9__ -D__arch64__ -m64
-# Undefine sparc when processing vmlinux.lds - it is used
-# And teach CPP we are doing 64 bit builds (for this case)
-CPPFLAGS_vmlinux.lds += -m64 -Usparc
LDFLAGS := -m elf64_sparc
export BITS := 64
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index becb6bf353a..f49e11cd4de 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -36,7 +36,6 @@ extern int sparc64_multi_core;
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
/*
* General functions that each host system must provide.
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 26cd25c0839..600a79035fa 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -12,22 +12,8 @@ static inline int cpu_to_node(int cpu)
#define parent_node(node) (node)
-static inline cpumask_t node_to_cpumask(int node)
-{
- return numa_cpumask_lookup_table[node];
-}
#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
-/*
- * Returns a pointer to the cpumask of CPUs on Node 'node'.
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
-#define node_to_cpumask_ptr(v, node) \
- cpumask_t *v = &(numa_cpumask_lookup_table[node])
-
-#define node_to_cpumask_ptr_next(v, node) \
- v = &(numa_cpumask_lookup_table[node])
-
struct pci_bus;
#ifdef CONFIG_PCI
extern int pcibus_to_node(struct pci_bus *pbus);
@@ -71,8 +57,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
#ifdef CONFIG_SMP
#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).core_id)
-#define topology_core_siblings(cpu) (cpu_core_map[cpu])
-#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#define mc_capable() (sparc64_multi_core)
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 3a048fad7ee..5b47fab9966 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -7,7 +7,11 @@ ccflags-y := -Werror
extra-y := head_$(BITS).o
extra-y += init_task.o
-extra-y += vmlinux.lds
+
+# Undefine sparc when processing vmlinux.lds - it is used
+# And teach CPP we are doing $(BITS) builds (for this case)
+CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS)
+extra-y += vmlinux.lds
obj-$(CONFIG_SPARC32) += entry.o wof.o wuf.o
obj-$(CONFIG_SPARC32) += etrap_32.o
diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c
index 28125c5b3d3..5fe3d65581f 100644
--- a/arch/sparc/kernel/init_task.c
+++ b/arch/sparc/kernel/init_task.c
@@ -18,6 +18,5 @@ EXPORT_SYMBOL(init_task);
* If this is not aligned on a 8k boundry, then you should change code
* in etrap.S which assumes it.
*/
-union thread_union init_thread_union
- __attribute__((section (".data.init_task")))
- = { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index f5000a460c0..04e28b2671c 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -16,7 +16,6 @@
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/times.h>
-#include <linux/utsname.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/sem.h>
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
index 15c2d752b2b..a63c5d2d984 100644
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -3,10 +3,11 @@
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/utsname.h>
#include <asm/utrap.h>
#include <asm/signal.h>
+struct new_utsname;
+
extern asmlinkage unsigned long sys_getpagesize(void);
extern asmlinkage unsigned long sparc_brk(unsigned long brk);
extern asmlinkage long sparc_pipe(struct pt_regs *regs);
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 0728def3223..fc633dbacf8 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -96,11 +96,10 @@ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,)
$(call cc-option, -fno-stack-protector,) \
$(call cc-option, -fno-stack-protector-all,)
-CONFIG_KERNEL_STACK_ORDER ?= 2
-STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )
-
-CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \
- -DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE)
+# Options used by linker script
+export LDS_START := $(START)
+export LDS_ELF_ARCH := $(ELF_ARCH)
+export LDS_ELF_FORMAT := $(ELF_FORMAT)
# The wrappers will select whether using "malloc" or the kernel allocator.
LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index 54f42e8b010..34d813011b7 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
unsigned cpu = smp_processor_id();
if(prev != next){
- cpu_clear(cpu, prev->cpu_vm_mask);
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
if(next != &init_mm)
__switch_mm(&next->context.id);
}
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 388ec0a3ea9..1119233597a 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -3,6 +3,9 @@
# Licensed under the GPL
#
+CPPFLAGS_vmlinux.lds := -U$(SUBARCH) -DSTART=$(LDS_START) \
+ -DELF_ARCH=$(LDS_ELF_ARCH) \
+ -DELF_FORMAT=$(LDS_ELF_FORMAT)
extra-y := vmlinux.lds
clean-files :=
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
index b25121b537d..8aa77b61a5f 100644
--- a/arch/um/kernel/init_task.c
+++ b/arch/um/kernel/init_task.c
@@ -30,9 +30,8 @@ EXPORT_SYMBOL(init_task);
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
union thread_union cpu0_irqstack
__attribute__((__section__(".data.init_irqstack"))) =
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 98351c78bc8..106bf27e2a9 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -111,7 +111,7 @@ void smp_prepare_cpus(unsigned int maxcpus)
int i;
for (i = 0; i < ncpus; ++i)
- cpu_set(i, cpu_possible_map);
+ set_cpu_possible(i, true);
cpu_clear(me, cpu_online_map);
cpu_set(me, cpu_online_map);
diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S
index f8aeb448aab..16e49bfa2b4 100644
--- a/arch/um/kernel/vmlinux.lds.S
+++ b/arch/um/kernel/vmlinux.lds.S
@@ -1,3 +1,6 @@
+
+KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER);
+
#ifdef CONFIG_LD_SCRIPT_STATIC
#include "uml.lds.S"
#else
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 7983c420eaf..a012ee8ef80 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -179,8 +179,8 @@ archclean:
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
echo ' install - Install kernel using'
- echo ' (your) ~/bin/installkernel or'
- echo ' (distribution) /sbin/installkernel or'
+ echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH) and run lilo'
echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh
index 8d60ee15dfd..d13ec1c3864 100644
--- a/arch/x86/boot/install.sh
+++ b/arch/x86/boot/install.sh
@@ -33,8 +33,8 @@ verify "$3"
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h
index 5d367caa0e3..549860d3be8 100644
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_CACHE_H
#define _ASM_X86_CACHE_H
+#include <linux/linkage.h>
+
/* L1 cache line size */
#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
@@ -13,7 +15,7 @@
#ifdef CONFIG_SMP
#define __cacheline_aligned_in_smp \
__attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \
- __attribute__((__section__(".data.page_aligned")))
+ __page_aligned_data
#endif
#endif
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index f923203dc39..4a2d4e0c18d 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -37,12 +37,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
- cpu_clear(cpu, prev->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
#ifdef CONFIG_SMP
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
/* Re-load page tables */
load_cr3(next->pgd);
@@ -58,7 +58,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index e63cf7d441e..139d4c1a33a 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog;
#define NMI_INVALID 3
struct ctl_table;
-struct file;
-extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
+extern int proc_nmi_enabled(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
extern int unknown_nmi_panic;
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index f76a162c082..ada8c201d51 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -143,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus)
static inline const struct cpumask *
cpumask_of_pcibus(const struct pci_bus *bus)
{
- return cpumask_of_node(__pcibus_to_node(bus));
+ int node;
+
+ node = __pcibus_to_node(bus);
+ return (node == -1) ? cpu_online_mask :
+ cpumask_of_node(node);
}
#endif
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 6a84ed166ae..1e796782cd7 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -121,7 +121,6 @@ static inline void arch_send_call_function_single_ipi(int cpu)
smp_ops.send_call_func_single_ipi(cpu);
}
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
smp_ops.send_call_func_ipi(mask);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 64970b9885f..dc69f28489f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -227,17 +227,14 @@ static struct irq_cfg *get_one_free_irq_cfg(int node)
cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
if (cfg) {
- if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+ if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
kfree(cfg);
cfg = NULL;
- } else if (!alloc_cpumask_var_node(&cfg->old_domain,
+ } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
GFP_ATOMIC, node)) {
free_cpumask_var(cfg->domain);
kfree(cfg);
cfg = NULL;
- } else {
- cpumask_clear(cfg->domain);
- cpumask_clear(cfg->old_domain);
}
}
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index cb66a22d98a..7ff61d6a188 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
/*
* proc handler for /proc/sys/kernel/nmi
*/
-int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+int proc_nmi_enabled(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
int old_state;
nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
old_state = nmi_watchdog_enabled;
- proc_dointvec(table, write, file, buffer, length, ppos);
+ proc_dointvec(table, write, buffer, length, ppos);
if (!!old_state == !!nmi_watchdog_enabled)
return 0;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index bca5fba91c9..f7dd2a7c3bf 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -5,7 +5,6 @@
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
-#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 54b0a327676..a071e6be177 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -5,7 +5,6 @@
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
-#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b766e8c7252..218aad7ee76 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -608,7 +608,7 @@ ENTRY(initial_code)
/*
* BSS section
*/
-.section ".bss.page_aligned","wa"
+__PAGE_ALIGNED_BSS
.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
swapper_pg_pmd:
@@ -626,7 +626,7 @@ ENTRY(empty_zero_page)
* This starts the data section.
*/
#ifdef CONFIG_X86_PAE
-.section ".data.page_aligned","wa"
+__PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */
.align PAGE_SIZE_asm
ENTRY(swapper_pg_dir)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index fa54f78e2a0..d0bc0a13a43 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -418,7 +418,7 @@ ENTRY(phys_base)
ENTRY(idt_table)
.skip IDT_ENTRIES * 16
- .section .bss.page_aligned, "aw", @nobits
+ __PAGE_ALIGNED_BSS
.align PAGE_SIZE
ENTRY(empty_zero_page)
.skip PAGE_SIZE
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 270ff83efc1..3a54dcb9cd0 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 71f1d99a635..ec6ef60cbd1 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -67,8 +67,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
#ifdef CONFIG_SMP
preempt_disable();
load_LDT(pc);
- if (!cpus_equal(current->mm->cpu_vm_mask,
- cpumask_of_cpu(smp_processor_id())))
+ if (!cpumask_equal(mm_cpumask(current->mm),
+ cpumask_of(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 847ab416031..5284cd2b577 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -555,10 +555,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
void __init init_c1e_mask(void)
{
/* If we're using c1e_idle, we need to allocate c1e_mask. */
- if (pm_idle == c1e_idle) {
- alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
- cpumask_clear(c1e_mask);
- }
+ if (pm_idle == c1e_idle)
+ zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
}
static int __init idle_setup(char *str)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 09c5e077dff..565ebc65920 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1059,12 +1059,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
#endif
current_thread_info()->cpu = 0; /* needed? */
for_each_possible_cpu(i) {
- alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
- alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
- cpumask_clear(per_cpu(cpu_core_map, i));
- cpumask_clear(per_cpu(cpu_sibling_map, i));
- cpumask_clear(cpu_data(i).llc_shared_map);
+ zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
}
set_cpu_sibling_map(0);
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e293ac56c72..dcb00d27851 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -93,7 +93,6 @@ static struct irqaction irq0 = {
void __init setup_default_timer_irq(void)
{
- irq0.mask = cpumask_of_cpu(0);
setup_irq(0, &irq0);
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9346e102338..a665c71352b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -14,7 +14,6 @@
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
-#include <linux/utsname.h>
#include <linux/kdebug.h>
#include <linux/kernel.h>
#include <linux/module.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index cf53a78e2dc..8cb4974ff59 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void)
}
#ifdef CONFIG_SYSCTL
-
-static int
-vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-}
-
static ctl_table kernel_table2[] = {
{ .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = vsyscall_sysctl_change },
+ .proc_handler = proc_dointvec },
{}
};
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 82728f2c6d5..f4cee9028cf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
info.si_errno = 0;
info.si_code = si_code;
info.si_addr = (void __user *)address;
+ info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
force_sig_info(si_signo, &info, tsk);
}
@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code,
}
static void
-do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+ unsigned int fault)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
+ int code = BUS_ADRERR;
up_read(&mm->mmap_sem);
@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+#ifdef CONFIG_MEMORY_FAILURE
+ if (fault & VM_FAULT_HWPOISON) {
+ printk(KERN_ERR
+ "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+ tsk->comm, tsk->pid, address);
+ code = BUS_MCEERR_AR;
+ }
+#endif
+ force_sig_info_fault(SIGBUS, code, address, tsk);
}
static noinline void
@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
if (fault & VM_FAULT_OOM) {
out_of_memory(regs, error_code, address);
} else {
- if (fault & VM_FAULT_SIGBUS)
- do_sigbus(regs, error_code, address);
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+ do_sigbus(regs, error_code, address, fault);
else
BUG();
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 24952fdc7e4..dd38bfbefd1 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -144,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size)
mb();
}
+EXPORT_SYMBOL_GPL(clflush_cache_range);
static void __cpa_flush_all(void *arg)
{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c814e144a3f..36fe08eeb5c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -59,7 +59,8 @@ void leave_mm(int cpu)
{
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
BUG();
- cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
+ cpumask_clear_cpu(cpu,
+ mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
@@ -234,8 +235,8 @@ void flush_tlb_current_task(void)
preempt_disable();
local_flush_tlb();
- if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
+ if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
preempt_enable();
}
@@ -249,8 +250,8 @@ void flush_tlb_mm(struct mm_struct *mm)
else
leave_mm(smp_processor_id());
}
- if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
+ if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
preempt_enable();
}
@@ -268,8 +269,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
leave_mm(smp_processor_id());
}
- if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(&mm->cpu_vm_mask, mm, va);
+ if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(mm_cpumask(mm), mm, va);
preempt_enable();
}
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 5db96d4304d..1331fcf2614 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -646,7 +646,7 @@ int get_mp_bus_to_node(int busnum)
#else /* CONFIG_X86_32 */
-static unsigned char mp_bus_to_node[BUS_NR] = {
+static int mp_bus_to_node[BUS_NR] = {
[0 ... BUS_NR - 1] = -1
};
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 88112b49f02..6b4ffedb93c 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -122,7 +122,7 @@ quiet_cmd_vdso = VDSO $@
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
-VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
GCOV_PROFILE := n
#
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 093dd59b538..3bf7b1d250c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1165,14 +1165,14 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
/* Get the "official" set of cpus referring to our pagetable. */
if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
for_each_online_cpu(cpu) {
- if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
+ if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
&& per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
continue;
smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
}
return;
}
- cpumask_copy(mask, &mm->cpu_vm_mask);
+ cpumask_copy(mask, mm_cpumask(mm));
/* It's possible that a vcpu may have a stale reference to our
cr3, because its in lazy mode, and it hasn't yet flushed
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index fe3186de6a3..6f56d95f2c1 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -27,7 +27,8 @@ sed-y = -e 's/(\(\.[a-z]*it\|\.ref\|\)\.text)/(\1.literal \1.text)/g' \
-e 's/(\(\.text\.[a-z]*\))/(\1.literal \1)/g'
quiet_cmd__cpp_lds_S = LDS $@
- cmd__cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ $< | sed $(sed-y) >$@
+ cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $< \
+ | sed $(sed-y) >$@
$(obj)/vmlinux.lds: $(src)/vmlinux.lds.S FORCE
$(call if_changed_dep,_cpp_lds_S)
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index d9ddc1ba761..d215adcfd4e 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -235,7 +235,7 @@ should_never_return:
* BSS section
*/
-.section ".bss.page_aligned", "w"
+__PAGE_ALIGNED_BSS
#ifdef CONFIG_MMU
ENTRY(swapper_pg_dir)
.fill PAGE_SIZE, 1, 0
diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c
index c4302f0e4ba..cd122fb7e48 100644
--- a/arch/xtensa/kernel/init_task.c
+++ b/arch/xtensa/kernel/init_task.c
@@ -23,9 +23,8 @@
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
-{ INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
struct task_struct init_task = INIT_TASK(init_task);
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
index d8fb3914598..e5aeb2b79e6 100644
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -14,3 +14,12 @@ config ASYNC_MEMSET
tristate
select ASYNC_CORE
+config ASYNC_PQ
+ tristate
+ select ASYNC_CORE
+
+config ASYNC_RAID6_RECOV
+ tristate
+ select ASYNC_CORE
+ select ASYNC_PQ
+
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
index 27baa7d52fb..d1e0e6f72bc 100644
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
obj-$(CONFIG_ASYNC_XOR) += async_xor.o
+obj-$(CONFIG_ASYNC_PQ) += async_pq.o
+obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
+obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index ddccfb01c41..0ec1fb69d4e 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -33,28 +33,31 @@
* async_memcpy - attempt to copy memory with a dma engine.
* @dest: destination page
* @src: src page
- * @offset: offset in pages to start transaction
+ * @dest_offset: offset into 'dest' to start transaction
+ * @src_offset: offset into 'src' to start transaction
* @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
- * @depend_tx: memcpy depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
*/
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
- unsigned int src_offset, size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+ unsigned int src_offset, size_t len,
+ struct async_submit_ctl *submit)
{
- struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY,
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
&dest, 1, &src, 1, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
- if (device) {
+ if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
dma_addr_t dma_dest, dma_src;
- unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+ unsigned long dma_prep_flags = 0;
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
DMA_FROM_DEVICE);
@@ -67,13 +70,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
if (tx) {
pr_debug("%s: (async) len: %zu\n", __func__, len);
- async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ async_tx_submit(chan, tx, submit);
} else {
void *dest_buf, *src_buf;
pr_debug("%s: (sync) len: %zu\n", __func__, len);
/* wait for any prerequisite operations */
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@@ -83,26 +86,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
kunmap_atomic(dest_buf, KM_USER0);
kunmap_atomic(src_buf, KM_USER1);
- async_tx_sync_epilog(cb_fn, cb_param);
+ async_tx_sync_epilog(submit);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_memcpy);
-static int __init async_memcpy_init(void)
-{
- return 0;
-}
-
-static void __exit async_memcpy_exit(void)
-{
- do { } while (0);
-}
-
-module_init(async_memcpy_init);
-module_exit(async_memcpy_exit);
-
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous memcpy api");
MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 5b5eb99bb24..58e4a8752ae 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,26 +35,26 @@
* @val: fill value
* @offset: offset in pages to start transaction
* @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: memset depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ *
+ * honored flags: ASYNC_TX_ACK
*/
struct dma_async_tx_descriptor *
-async_memset(struct page *dest, int val, unsigned int offset,
- size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+async_memset(struct page *dest, int val, unsigned int offset, size_t len,
+ struct async_submit_ctl *submit)
{
- struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET,
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
&dest, 1, NULL, 0, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
- if (device) {
+ if (device && is_dma_fill_aligned(device, offset, 0, len)) {
dma_addr_t dma_dest;
- unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+ unsigned long dma_prep_flags = 0;
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
dma_dest = dma_map_page(device->dev, dest, offset, len,
DMA_FROM_DEVICE);
@@ -64,38 +64,25 @@ async_memset(struct page *dest, int val, unsigned int offset,
if (tx) {
pr_debug("%s: (async) len: %zu\n", __func__, len);
- async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ async_tx_submit(chan, tx, submit);
} else { /* run the memset synchronously */
void *dest_buf;
pr_debug("%s: (sync) len: %zu\n", __func__, len);
- dest_buf = (void *) (((char *) page_address(dest)) + offset);
+ dest_buf = page_address(dest) + offset;
/* wait for any prerequisite operations */
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
memset(dest_buf, val, len);
- async_tx_sync_epilog(cb_fn, cb_param);
+ async_tx_sync_epilog(submit);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_memset);
-static int __init async_memset_init(void)
-{
- return 0;
-}
-
-static void __exit async_memset_exit(void)
-{
- do { } while (0);
-}
-
-module_init(async_memset_init);
-module_exit(async_memset_exit);
-
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous memset api");
MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
new file mode 100644
index 00000000000..b88db6d1dc6
--- /dev/null
+++ b/crypto/async_tx/async_pq.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+/**
+ * scribble - space to hold throwaway P buffer for synchronous gen_syndrome
+ */
+static struct page *scribble;
+
+static bool is_raid6_zero_block(struct page *p)
+{
+ return p == (void *) raid6_empty_zero_page;
+}
+
+/* the struct page *blocks[] parameter passed to async_gen_syndrome()
+ * and async_syndrome_val() contains the 'P' destination address at
+ * blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
+ *
+ * note: these are macros as they are used as lvalues
+ */
+#define P(b, d) (b[d-2])
+#define Q(b, d) (b[d-1])
+
+/**
+ * do_async_gen_syndrome - asynchronously calculate P and/or Q
+ */
+static __async_inline struct dma_async_tx_descriptor *
+do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
+ const unsigned char *scfs, unsigned int offset, int disks,
+ size_t len, dma_addr_t *dma_src,
+ struct async_submit_ctl *submit)
+{
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct dma_device *dma = chan->device;
+ enum dma_ctrl_flags dma_flags = 0;
+ enum async_tx_flags flags_orig = submit->flags;
+ dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+ dma_async_tx_callback cb_param_orig = submit->cb_param;
+ int src_cnt = disks - 2;
+ unsigned char coefs[src_cnt];
+ unsigned short pq_src_cnt;
+ dma_addr_t dma_dest[2];
+ int src_off = 0;
+ int idx;
+ int i;
+
+ /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
+ if (P(blocks, disks))
+ dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
+ len, DMA_BIDIRECTIONAL);
+ else
+ dma_flags |= DMA_PREP_PQ_DISABLE_P;
+ if (Q(blocks, disks))
+ dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
+ len, DMA_BIDIRECTIONAL);
+ else
+ dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+
+ /* convert source addresses being careful to collapse 'empty'
+ * sources and update the coefficients accordingly
+ */
+ for (i = 0, idx = 0; i < src_cnt; i++) {
+ if (is_raid6_zero_block(blocks[i]))
+ continue;
+ dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
+ DMA_TO_DEVICE);
+ coefs[idx] = scfs[i];
+ idx++;
+ }
+ src_cnt = idx;
+
+ while (src_cnt > 0) {
+ submit->flags = flags_orig;
+ pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
+ /* if we are submitting additional pqs, leave the chain open,
+ * clear the callback parameters, and leave the destination
+ * buffers mapped
+ */
+ if (src_cnt > pq_src_cnt) {
+ submit->flags &= ~ASYNC_TX_ACK;
+ submit->flags |= ASYNC_TX_FENCE;
+ dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
+ submit->cb_fn = NULL;
+ submit->cb_param = NULL;
+ } else {
+ dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
+ submit->cb_fn = cb_fn_orig;
+ submit->cb_param = cb_param_orig;
+ if (cb_fn_orig)
+ dma_flags |= DMA_PREP_INTERRUPT;
+ }
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
+
+ /* Since we have clobbered the src_list we are committed
+ * to doing this asynchronously. Drivers force forward
+ * progress in case they can not provide a descriptor
+ */
+ for (;;) {
+ tx = dma->device_prep_dma_pq(chan, dma_dest,
+ &dma_src[src_off],
+ pq_src_cnt,
+ &coefs[src_off], len,
+ dma_flags);
+ if (likely(tx))
+ break;
+ async_tx_quiesce(&submit->depend_tx);
+ dma_async_issue_pending(chan);
+ }
+
+ async_tx_submit(chan, tx, submit);
+ submit->depend_tx = tx;
+
+ /* drop completed sources */
+ src_cnt -= pq_src_cnt;
+ src_off += pq_src_cnt;
+
+ dma_flags |= DMA_PREP_CONTINUE;
+ }
+
+ return tx;
+}
+
+/**
+ * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
+ */
+static void
+do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+ size_t len, struct async_submit_ctl *submit)
+{
+ void **srcs;
+ int i;
+
+ if (submit->scribble)
+ srcs = submit->scribble;
+ else
+ srcs = (void **) blocks;
+
+ for (i = 0; i < disks; i++) {
+ if (is_raid6_zero_block(blocks[i])) {
+ BUG_ON(i > disks - 3); /* P or Q can't be zero */
+ srcs[i] = blocks[i];
+ } else
+ srcs[i] = page_address(blocks[i]) + offset;
+ }
+ raid6_call.gen_syndrome(disks, len, srcs);
+ async_tx_sync_epilog(submit);
+}
+
+/**
+ * async_gen_syndrome - asynchronously calculate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @submit: submission/completion modifiers
+ *
+ * General note: This routine assumes a field of GF(2^8) with a
+ * primitive polynomial of 0x11d and a generator of {02}.
+ *
+ * 'disks' note: callers can optionally omit either P or Q (but not
+ * both) from the calculation by setting blocks[disks-2] or
+ * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <=
+ * PAGE_SIZE as a temporary buffer of this size is used in the
+ * synchronous path. 'disks' always accounts for both destination
+ * buffers.
+ *
+ * 'blocks' note: if submit->scribble is NULL then the contents of
+ * 'blocks' may be overridden
+ */
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+ size_t len, struct async_submit_ctl *submit)
+{
+ int src_cnt = disks - 2;
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+ &P(blocks, disks), 2,
+ blocks, src_cnt, len);
+ struct dma_device *device = chan ? chan->device : NULL;
+ dma_addr_t *dma_src = NULL;
+
+ BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
+
+ if (submit->scribble)
+ dma_src = submit->scribble;
+ else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+ dma_src = (dma_addr_t *) blocks;
+
+ if (dma_src && device &&
+ (src_cnt <= dma_maxpq(device, 0) ||
+ dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
+ is_dma_pq_aligned(device, offset, 0, len)) {
+ /* run the p+q asynchronously */
+ pr_debug("%s: (async) disks: %d len: %zu\n",
+ __func__, disks, len);
+ return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
+ disks, len, dma_src, submit);
+ }
+
+ /* run the pq synchronously */
+ pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
+
+ /* wait for any prerequisite operations */
+ async_tx_quiesce(&submit->depend_tx);
+
+ if (!P(blocks, disks)) {
+ P(blocks, disks) = scribble;
+ BUG_ON(len + offset > PAGE_SIZE);
+ }
+ if (!Q(blocks, disks)) {
+ Q(blocks, disks) = scribble;
+ BUG_ON(len + offset > PAGE_SIZE);
+ }
+ do_sync_gen_syndrome(blocks, offset, disks, len, submit);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(async_gen_syndrome);
+
+/**
+ * async_syndrome_val - asynchronously validate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
+ * @spare: temporary result buffer for the synchronous case
+ * @submit: submission / completion modifiers
+ *
+ * The same notes from async_gen_syndrome apply to the 'blocks',
+ * and 'disks' parameters of this routine. The synchronous path
+ * requires a temporary result buffer and submit->scribble to be
+ * specified.
+ */
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
+ size_t len, enum sum_check_flags *pqres, struct page *spare,
+ struct async_submit_ctl *submit)
+{
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
+ NULL, 0, blocks, disks,
+ len);
+ struct dma_device *device = chan ? chan->device : NULL;
+ struct dma_async_tx_descriptor *tx;
+ enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+ dma_addr_t *dma_src = NULL;
+
+ BUG_ON(disks < 4);
+
+ if (submit->scribble)
+ dma_src = submit->scribble;
+ else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+ dma_src = (dma_addr_t *) blocks;
+
+ if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+ is_dma_pq_aligned(device, offset, 0, len)) {
+ struct device *dev = device->dev;
+ dma_addr_t *pq = &dma_src[disks-2];
+ int i;
+
+ pr_debug("%s: (async) disks: %d len: %zu\n",
+ __func__, disks, len);
+ if (!P(blocks, disks))
+ dma_flags |= DMA_PREP_PQ_DISABLE_P;
+ if (!Q(blocks, disks))
+ dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
+ for (i = 0; i < disks; i++)
+ if (likely(blocks[i])) {
+ BUG_ON(is_raid6_zero_block(blocks[i]));
+ dma_src[i] = dma_map_page(dev, blocks[i],
+ offset, len,
+ DMA_TO_DEVICE);
+ }
+
+ for (;;) {
+ tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+ disks - 2,
+ raid6_gfexp,
+ len, pqres,
+ dma_flags);
+ if (likely(tx))
+ break;
+ async_tx_quiesce(&submit->depend_tx);
+ dma_async_issue_pending(chan);
+ }
+ async_tx_submit(chan, tx, submit);
+
+ return tx;
+ } else {
+ struct page *p_src = P(blocks, disks);
+ struct page *q_src = Q(blocks, disks);
+ enum async_tx_flags flags_orig = submit->flags;
+ dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+ void *scribble = submit->scribble;
+ void *cb_param_orig = submit->cb_param;
+ void *p, *q, *s;
+
+ pr_debug("%s: (sync) disks: %d len: %zu\n",
+ __func__, disks, len);
+
+ /* caller must provide a temporary result buffer and
+ * allow the input parameters to be preserved
+ */
+ BUG_ON(!spare || !scribble);
+
+ /* wait for any prerequisite operations */
+ async_tx_quiesce(&submit->depend_tx);
+
+ /* recompute p and/or q into the temporary buffer and then
+ * check to see the result matches the current value
+ */
+ tx = NULL;
+ *pqres = 0;
+ if (p_src) {
+ init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+ NULL, NULL, scribble);
+ tx = async_xor(spare, blocks, offset, disks-2, len, submit);
+ async_tx_quiesce(&tx);
+ p = page_address(p_src) + offset;
+ s = page_address(spare) + offset;
+ *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
+ }
+
+ if (q_src) {
+ P(blocks, disks) = NULL;
+ Q(blocks, disks) = spare;
+ init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
+ tx = async_gen_syndrome(blocks, offset, disks, len, submit);
+ async_tx_quiesce(&tx);
+ q = page_address(q_src) + offset;
+ s = page_address(spare) + offset;
+ *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
+ }
+
+ /* restore P, Q and submit */
+ P(blocks, disks) = p_src;
+ Q(blocks, disks) = q_src;
+
+ submit->cb_fn = cb_fn_orig;
+ submit->cb_param = cb_param_orig;
+ submit->flags = flags_orig;
+ async_tx_sync_epilog(submit);
+
+ return NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(async_syndrome_val);
+
+static int __init async_pq_init(void)
+{
+ scribble = alloc_page(GFP_KERNEL);
+
+ if (scribble)
+ return 0;
+
+ pr_err("%s: failed to allocate required spare page\n", __func__);
+
+ return -ENOMEM;
+}
+
+static void __exit async_pq_exit(void)
+{
+ put_page(scribble);
+}
+
+module_init(async_pq_init);
+module_exit(async_pq_exit);
+
+MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
new file mode 100644
index 00000000000..6d73dde4786
--- /dev/null
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -0,0 +1,468 @@
+/*
+ * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * based on raid6recov.c:
+ * Copyright 2002 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+static struct dma_async_tx_descriptor *
+async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
+ size_t len, struct async_submit_ctl *submit)
+{
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+ &dest, 1, srcs, 2, len);
+ struct dma_device *dma = chan ? chan->device : NULL;
+ const u8 *amul, *bmul;
+ u8 ax, bx;
+ u8 *a, *b, *c;
+
+ if (dma) {
+ dma_addr_t dma_dest[2];
+ dma_addr_t dma_src[2];
+ struct device *dev = dma->dev;
+ struct dma_async_tx_descriptor *tx;
+ enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
+ dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+ dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+ dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+ len, dma_flags);
+ if (tx) {
+ async_tx_submit(chan, tx, submit);
+ return tx;
+ }
+
+ /* could not get a descriptor, unmap and fall through to
+ * the synchronous path
+ */
+ dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
+ dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+ dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
+ }
+
+ /* run the operation synchronously */
+ async_tx_quiesce(&submit->depend_tx);
+ amul = raid6_gfmul[coef[0]];
+ bmul = raid6_gfmul[coef[1]];
+ a = page_address(srcs[0]);
+ b = page_address(srcs[1]);
+ c = page_address(dest);
+
+ while (len--) {
+ ax = amul[*a++];
+ bx = bmul[*b++];
+ *c++ = ax ^ bx;
+ }
+
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
+ struct async_submit_ctl *submit)
+{
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+ &dest, 1, &src, 1, len);
+ struct dma_device *dma = chan ? chan->device : NULL;
+ const u8 *qmul; /* Q multiplier table */
+ u8 *d, *s;
+
+ if (dma) {
+ dma_addr_t dma_dest[2];
+ dma_addr_t dma_src[1];
+ struct device *dev = dma->dev;
+ struct dma_async_tx_descriptor *tx;
+ enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
+ dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+ dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
+ len, dma_flags);
+ if (tx) {
+ async_tx_submit(chan, tx, submit);
+ return tx;
+ }
+
+ /* could not get a descriptor, unmap and fall through to
+ * the synchronous path
+ */
+ dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
+ dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+ }
+
+ /* no channel available, or failed to allocate a descriptor, so
+ * perform the operation synchronously
+ */
+ async_tx_quiesce(&submit->depend_tx);
+ qmul = raid6_gfmul[coef];
+ d = page_address(dest);
+ s = page_address(src);
+
+ while (len--)
+ *d++ = qmul[*s++];
+
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
+ struct async_submit_ctl *submit)
+{
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct page *p, *q, *a, *b;
+ struct page *srcs[2];
+ unsigned char coef[2];
+ enum async_tx_flags flags = submit->flags;
+ dma_async_tx_callback cb_fn = submit->cb_fn;
+ void *cb_param = submit->cb_param;
+ void *scribble = submit->scribble;
+
+ p = blocks[4-2];
+ q = blocks[4-1];
+
+ a = blocks[faila];
+ b = blocks[failb];
+
+ /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
+ /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+ srcs[0] = p;
+ srcs[1] = q;
+ coef[0] = raid6_gfexi[failb-faila];
+ coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_sum_product(b, srcs, coef, bytes, submit);
+
+ /* Dy = P+Pxy+Dx */
+ srcs[0] = p;
+ srcs[1] = b;
+ init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
+ cb_param, scribble);
+ tx = async_xor(a, srcs, 0, 2, bytes, submit);
+
+ return tx;
+
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
+ struct async_submit_ctl *submit)
+{
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct page *p, *q, *g, *dp, *dq;
+ struct page *srcs[2];
+ unsigned char coef[2];
+ enum async_tx_flags flags = submit->flags;
+ dma_async_tx_callback cb_fn = submit->cb_fn;
+ void *cb_param = submit->cb_param;
+ void *scribble = submit->scribble;
+ int uninitialized_var(good);
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ if (i == faila || i == failb)
+ continue;
+ else {
+ good = i;
+ break;
+ }
+ }
+ BUG_ON(i >= 3);
+
+ p = blocks[5-2];
+ q = blocks[5-1];
+ g = blocks[good];
+
+ /* Compute syndrome with zero for the missing data pages
+ * Use the dead data pages as temporary storage for delta p and
+ * delta q
+ */
+ dp = blocks[faila];
+ dq = blocks[failb];
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_memcpy(dp, g, 0, 0, bytes, submit);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+
+ /* compute P + Pxy */
+ srcs[0] = dp;
+ srcs[1] = p;
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
+ tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+ /* compute Q + Qxy */
+ srcs[0] = dq;
+ srcs[1] = q;
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
+ tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+ /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+ srcs[0] = dp;
+ srcs[1] = dq;
+ coef[0] = raid6_gfexi[failb-faila];
+ coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+ /* Dy = P+Pxy+Dx */
+ srcs[0] = dp;
+ srcs[1] = dq;
+ init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+ cb_param, scribble);
+ tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+ return tx;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_n(int disks, size_t bytes, int faila, int failb,
+ struct page **blocks, struct async_submit_ctl *submit)
+{
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct page *p, *q, *dp, *dq;
+ struct page *srcs[2];
+ unsigned char coef[2];
+ enum async_tx_flags flags = submit->flags;
+ dma_async_tx_callback cb_fn = submit->cb_fn;
+ void *cb_param = submit->cb_param;
+ void *scribble = submit->scribble;
+
+ p = blocks[disks-2];
+ q = blocks[disks-1];
+
+ /* Compute syndrome with zero for the missing data pages
+ * Use the dead data pages as temporary storage for
+ * delta p and delta q
+ */
+ dp = blocks[faila];
+ blocks[faila] = (void *)raid6_empty_zero_page;
+ blocks[disks-2] = dp;
+ dq = blocks[failb];
+ blocks[failb] = (void *)raid6_empty_zero_page;
+ blocks[disks-1] = dq;
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+
+ /* Restore pointer table */
+ blocks[faila] = dp;
+ blocks[failb] = dq;
+ blocks[disks-2] = p;
+ blocks[disks-1] = q;
+
+ /* compute P + Pxy */
+ srcs[0] = dp;
+ srcs[1] = p;
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
+ tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+ /* compute Q + Qxy */
+ srcs[0] = dq;
+ srcs[1] = q;
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
+ tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+ /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+ srcs[0] = dp;
+ srcs[1] = dq;
+ coef[0] = raid6_gfexi[failb-faila];
+ coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+ /* Dy = P+Pxy+Dx */
+ srcs[0] = dp;
+ srcs[1] = dq;
+ init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+ cb_param, scribble);
+ tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+ return tx;
+}
+
+/**
+ * async_raid6_2data_recov - asynchronously calculate two missing data blocks
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: first failed drive index
+ * @failb: second failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+ struct page **blocks, struct async_submit_ctl *submit)
+{
+ BUG_ON(faila == failb);
+ if (failb < faila)
+ swap(faila, failb);
+
+ pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+ /* we need to preserve the contents of 'blocks' for the async
+ * case, so punt to synchronous if a scribble buffer is not available
+ */
+ if (!submit->scribble) {
+ void **ptrs = (void **) blocks;
+ int i;
+
+ async_tx_quiesce(&submit->depend_tx);
+ for (i = 0; i < disks; i++)
+ ptrs[i] = page_address(blocks[i]);
+
+ raid6_2data_recov(disks, bytes, faila, failb, ptrs);
+
+ async_tx_sync_epilog(submit);
+
+ return NULL;
+ }
+
+ switch (disks) {
+ case 4:
+ /* dma devices do not uniformly understand a zero source pq
+ * operation (in contrast to the synchronous case), so
+ * explicitly handle the 4 disk special case
+ */
+ return __2data_recov_4(bytes, faila, failb, blocks, submit);
+ case 5:
+ /* dma devices do not uniformly understand a single
+ * source pq operation (in contrast to the synchronous
+ * case), so explicitly handle the 5 disk special case
+ */
+ return __2data_recov_5(bytes, faila, failb, blocks, submit);
+ default:
+ return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
+ }
+}
+EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
+
+/**
+ * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_datap_recov(int disks, size_t bytes, int faila,
+ struct page **blocks, struct async_submit_ctl *submit)
+{
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct page *p, *q, *dq;
+ u8 coef;
+ enum async_tx_flags flags = submit->flags;
+ dma_async_tx_callback cb_fn = submit->cb_fn;
+ void *cb_param = submit->cb_param;
+ void *scribble = submit->scribble;
+ struct page *srcs[2];
+
+ pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+ /* we need to preserve the contents of 'blocks' for the async
+ * case, so punt to synchronous if a scribble buffer is not available
+ */
+ if (!scribble) {
+ void **ptrs = (void **) blocks;
+ int i;
+
+ async_tx_quiesce(&submit->depend_tx);
+ for (i = 0; i < disks; i++)
+ ptrs[i] = page_address(blocks[i]);
+
+ raid6_datap_recov(disks, bytes, faila, ptrs);
+
+ async_tx_sync_epilog(submit);
+
+ return NULL;
+ }
+
+ p = blocks[disks-2];
+ q = blocks[disks-1];
+
+ /* Compute syndrome with zero for the missing data page
+ * Use the dead data page as temporary storage for delta q
+ */
+ dq = blocks[faila];
+ blocks[faila] = (void *)raid6_empty_zero_page;
+ blocks[disks-1] = dq;
+
+ /* in the 4 disk case we only need to perform a single source
+ * multiplication
+ */
+ if (disks == 4) {
+ int good = faila == 0 ? 1 : 0;
+ struct page *g = blocks[good];
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
+ tx = async_memcpy(p, g, 0, 0, bytes, submit);
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
+ tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+ } else {
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
+ tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+ }
+
+ /* Restore pointer table */
+ blocks[faila] = dq;
+ blocks[disks-1] = q;
+
+ /* calculate g^{-faila} */
+ coef = raid6_gfinv[raid6_gfexp[faila]];
+
+ srcs[0] = dq;
+ srcs[1] = q;
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
+ tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_mult(dq, dq, coef, bytes, submit);
+
+ srcs[0] = p;
+ srcs[1] = dq;
+ init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+ cb_param, scribble);
+ tx = async_xor(p, srcs, 0, 2, bytes, submit);
+
+ return tx;
+}
+EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
+
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 06eb6cc09fe..f9cdf04fe7c 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -42,16 +42,21 @@ static void __exit async_tx_exit(void)
async_dmaengine_put();
}
+module_init(async_tx_init);
+module_exit(async_tx_exit);
+
/**
* __async_tx_find_channel - find a channel to carry out the operation or let
* the transaction execute synchronously
- * @depend_tx: transaction dependency
+ * @submit: transaction dependency and submission modifiers
* @tx_type: transaction type
*/
struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
- enum dma_transaction_type tx_type)
+__async_tx_find_channel(struct async_submit_ctl *submit,
+ enum dma_transaction_type tx_type)
{
+ struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
/* see if we can keep the chain on one channel */
if (depend_tx &&
dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
@@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
return async_dma_find_channel(tx_type);
}
EXPORT_SYMBOL_GPL(__async_tx_find_channel);
-#else
-static int __init async_tx_init(void)
-{
- printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
- return 0;
-}
-
-static void __exit async_tx_exit(void)
-{
- do { } while (0);
-}
#endif
@@ -83,10 +77,14 @@ static void
async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
struct dma_async_tx_descriptor *tx)
{
- struct dma_chan *chan;
- struct dma_device *device;
+ struct dma_chan *chan = depend_tx->chan;
+ struct dma_device *device = chan->device;
struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
+ #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+ BUG();
+ #endif
+
/* first check to see if we can still append to depend_tx */
spin_lock_bh(&depend_tx->lock);
if (depend_tx->parent && depend_tx->chan == tx->chan) {
@@ -96,11 +94,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
}
spin_unlock_bh(&depend_tx->lock);
- if (!intr_tx)
+ /* attached dependency, flush the parent channel */
+ if (!intr_tx) {
+ device->device_issue_pending(chan);
return;
-
- chan = depend_tx->chan;
- device = chan->device;
+ }
/* see if we can schedule an interrupt
* otherwise poll for completion
@@ -134,6 +132,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
intr_tx->tx_submit(intr_tx);
async_tx_ack(intr_tx);
}
+ device->device_issue_pending(chan);
} else {
if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for depend_tx\n",
@@ -144,13 +143,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
/**
- * submit_disposition - while holding depend_tx->lock we must avoid submitting
- * new operations to prevent a circular locking dependency with
- * drivers that already hold a channel lock when calling
- * async_tx_run_dependencies.
+ * submit_disposition - flags for routing an incoming operation
* @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
* @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
* @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
+ *
+ * while holding depend_tx->lock we must avoid submitting new operations
+ * to prevent a circular locking dependency with drivers that already
+ * hold a channel lock when calling async_tx_run_dependencies.
*/
enum submit_disposition {
ASYNC_TX_SUBMITTED,
@@ -160,11 +160,12 @@ enum submit_disposition {
void
async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
- enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+ struct async_submit_ctl *submit)
{
- tx->callback = cb_fn;
- tx->callback_param = cb_param;
+ struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
+ tx->callback = submit->cb_fn;
+ tx->callback_param = submit->cb_param;
if (depend_tx) {
enum submit_disposition s;
@@ -220,30 +221,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
tx->tx_submit(tx);
}
- if (flags & ASYNC_TX_ACK)
+ if (submit->flags & ASYNC_TX_ACK)
async_tx_ack(tx);
- if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
+ if (depend_tx)
async_tx_ack(depend_tx);
}
EXPORT_SYMBOL_GPL(async_tx_submit);
/**
- * async_trigger_callback - schedules the callback function to be run after
- * any dependent operations have been completed.
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: 'callback' requires the completion of this transaction
- * @cb_fn: function to call after depend_tx completes
- * @cb_param: parameter to pass to the callback routine
+ * async_trigger_callback - schedules the callback function to be run
+ * @submit: submission and completion parameters
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * The callback is run after any dependent operations have completed.
*/
struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+async_trigger_callback(struct async_submit_ctl *submit)
{
struct dma_chan *chan;
struct dma_device *device;
struct dma_async_tx_descriptor *tx;
+ struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
if (depend_tx) {
chan = depend_tx->chan;
@@ -262,14 +262,14 @@ async_trigger_callback(enum async_tx_flags flags,
if (tx) {
pr_debug("%s: (async)\n", __func__);
- async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ async_tx_submit(chan, tx, submit);
} else {
pr_debug("%s: (sync)\n", __func__);
/* wait for any prerequisite operations */
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
- async_tx_sync_epilog(cb_fn, cb_param);
+ async_tx_sync_epilog(submit);
}
return tx;
@@ -295,9 +295,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
}
EXPORT_SYMBOL_GPL(async_tx_quiesce);
-module_init(async_tx_init);
-module_exit(async_tx_exit);
-
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 90dd3f8bd28..b459a9034aa 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -33,19 +33,16 @@
/* do_async_xor - dma map the pages and perform the xor with an engine */
static __async_inline struct dma_async_tx_descriptor *
do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
- unsigned int offset, int src_cnt, size_t len,
- enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+ unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
+ struct async_submit_ctl *submit)
{
struct dma_device *dma = chan->device;
- dma_addr_t *dma_src = (dma_addr_t *) src_list;
struct dma_async_tx_descriptor *tx = NULL;
int src_off = 0;
int i;
- dma_async_tx_callback _cb_fn;
- void *_cb_param;
- enum async_tx_flags async_flags;
+ dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+ void *cb_param_orig = submit->cb_param;
+ enum async_tx_flags flags_orig = submit->flags;
enum dma_ctrl_flags dma_flags;
int xor_src_cnt;
dma_addr_t dma_dest;
@@ -63,25 +60,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
}
while (src_cnt) {
- async_flags = flags;
+ submit->flags = flags_orig;
dma_flags = 0;
- xor_src_cnt = min(src_cnt, dma->max_xor);
+ xor_src_cnt = min(src_cnt, (int)dma->max_xor);
/* if we are submitting additional xors, leave the chain open,
* clear the callback parameters, and leave the destination
* buffer mapped
*/
if (src_cnt > xor_src_cnt) {
- async_flags &= ~ASYNC_TX_ACK;
+ submit->flags &= ~ASYNC_TX_ACK;
+ submit->flags |= ASYNC_TX_FENCE;
dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
- _cb_fn = NULL;
- _cb_param = NULL;
+ submit->cb_fn = NULL;
+ submit->cb_param = NULL;
} else {
- _cb_fn = cb_fn;
- _cb_param = cb_param;
+ submit->cb_fn = cb_fn_orig;
+ submit->cb_param = cb_param_orig;
}
- if (_cb_fn)
+ if (submit->cb_fn)
dma_flags |= DMA_PREP_INTERRUPT;
-
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
/* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward progress
* in case they can not provide a descriptor
@@ -90,7 +89,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
xor_src_cnt, len, dma_flags);
if (unlikely(!tx))
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
/* spin wait for the preceeding transactions to complete */
while (unlikely(!tx)) {
@@ -101,11 +100,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
dma_flags);
}
- async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
- _cb_param);
-
- depend_tx = tx;
- flags |= ASYNC_TX_DEP_ACK;
+ async_tx_submit(chan, tx, submit);
+ submit->depend_tx = tx;
if (src_cnt > xor_src_cnt) {
/* drop completed sources */
@@ -124,23 +120,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
static void
do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
- int src_cnt, size_t len, enum async_tx_flags flags,
- dma_async_tx_callback cb_fn, void *cb_param)
+ int src_cnt, size_t len, struct async_submit_ctl *submit)
{
int i;
int xor_src_cnt;
int src_off = 0;
void *dest_buf;
- void **srcs = (void **) src_list;
+ void **srcs;
+
+ if (submit->scribble)
+ srcs = submit->scribble;
+ else
+ srcs = (void **) src_list;
- /* reuse the 'src_list' array to convert to buffer pointers */
+ /* convert to buffer pointers */
for (i = 0; i < src_cnt; i++)
srcs[i] = page_address(src_list[i]) + offset;
/* set destination address */
dest_buf = page_address(dest) + offset;
- if (flags & ASYNC_TX_XOR_ZERO_DST)
+ if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
memset(dest_buf, 0, len);
while (src_cnt > 0) {
@@ -153,61 +153,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
src_off += xor_src_cnt;
}
- async_tx_sync_epilog(cb_fn, cb_param);
+ async_tx_sync_epilog(submit);
}
/**
* async_xor - attempt to xor a set of blocks with a dma engine.
- * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
- * flag must be set to not include dest data in the calculation. The
- * assumption with dma eninges is that they only use the destination
- * buffer as a source when it is explicity specified in the source list.
* @dest: destination page
- * @src_list: array of source pages (if the dest is also a source it must be
- * at index zero). The contents of this array may be overwritten.
- * @offset: offset in pages to start transaction
+ * @src_list: array of source pages
+ * @offset: common src/dst offset to start transaction
* @src_cnt: number of source pages
* @len: length in bytes
- * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
- * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
+ *
+ * xor_blocks always uses the dest as a source so the
+ * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
+ * the calculation. The assumption with dma eninges is that they only
+ * use the destination buffer as a source when it is explicity specified
+ * in the source list.
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
*/
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
- int src_cnt, size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+ int src_cnt, size_t len, struct async_submit_ctl *submit)
{
- struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
&dest, 1, src_list,
src_cnt, len);
+ dma_addr_t *dma_src = NULL;
+
BUG_ON(src_cnt <= 1);
- if (chan) {
+ if (submit->scribble)
+ dma_src = submit->scribble;
+ else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+ dma_src = (dma_addr_t *) src_list;
+
+ if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
/* run the xor asynchronously */
pr_debug("%s (async): len: %zu\n", __func__, len);
return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
- flags, depend_tx, cb_fn, cb_param);
+ dma_src, submit);
} else {
/* run the xor synchronously */
pr_debug("%s (sync): len: %zu\n", __func__, len);
+ WARN_ONCE(chan, "%s: no space for dma address conversion\n",
+ __func__);
/* in the sync case the dest is an implied source
* (assumes the dest is the first source)
*/
- if (flags & ASYNC_TX_XOR_DROP_DST) {
+ if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
src_cnt--;
src_list++;
}
/* wait for any prerequisite operations */
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
- do_sync_xor(dest, src_list, offset, src_cnt, len,
- flags, cb_fn, cb_param);
+ do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
return NULL;
}
@@ -222,104 +231,94 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
}
/**
- * async_xor_zero_sum - attempt a xor parity check with a dma engine.
+ * async_xor_val - attempt a xor parity check with a dma engine.
* @dest: destination page used if the xor is performed synchronously
- * @src_list: array of source pages. The dest page must be listed as a source
- * at index zero. The contents of this array may be overwritten.
+ * @src_list: array of source pages
* @offset: offset in pages to start transaction
* @src_cnt: number of source pages
* @len: length in bytes
* @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
*/
struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
- unsigned int offset, int src_cnt, size_t len,
- u32 *result, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+ int src_cnt, size_t len, enum sum_check_flags *result,
+ struct async_submit_ctl *submit)
{
- struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM,
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
&dest, 1, src_list,
src_cnt, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
+ dma_addr_t *dma_src = NULL;
BUG_ON(src_cnt <= 1);
- if (device && src_cnt <= device->max_xor) {
- dma_addr_t *dma_src = (dma_addr_t *) src_list;
- unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+ if (submit->scribble)
+ dma_src = submit->scribble;
+ else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+ dma_src = (dma_addr_t *) src_list;
+
+ if (dma_src && device && src_cnt <= device->max_xor &&
+ is_dma_xor_aligned(device, offset, 0, len)) {
+ unsigned long dma_prep_flags = 0;
int i;
pr_debug("%s: (async) len: %zu\n", __func__, len);
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
for (i = 0; i < src_cnt; i++)
dma_src[i] = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE);
- tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
- len, result,
- dma_prep_flags);
+ tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
+ len, result,
+ dma_prep_flags);
if (unlikely(!tx)) {
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
while (!tx) {
dma_async_issue_pending(chan);
- tx = device->device_prep_dma_zero_sum(chan,
+ tx = device->device_prep_dma_xor_val(chan,
dma_src, src_cnt, len, result,
dma_prep_flags);
}
}
- async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ async_tx_submit(chan, tx, submit);
} else {
- unsigned long xor_flags = flags;
+ enum async_tx_flags flags_orig = submit->flags;
pr_debug("%s: (sync) len: %zu\n", __func__, len);
+ WARN_ONCE(device && src_cnt <= device->max_xor,
+ "%s: no space for dma address conversion\n",
+ __func__);
- xor_flags |= ASYNC_TX_XOR_DROP_DST;
- xor_flags &= ~ASYNC_TX_ACK;
+ submit->flags |= ASYNC_TX_XOR_DROP_DST;
+ submit->flags &= ~ASYNC_TX_ACK;
- tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
- depend_tx, NULL, NULL);
+ tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
async_tx_quiesce(&tx);
- *result = page_is_zero(dest, offset, len) ? 0 : 1;
+ *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
- async_tx_sync_epilog(cb_fn, cb_param);
+ async_tx_sync_epilog(submit);
+ submit->flags = flags_orig;
}
return tx;
}
-EXPORT_SYMBOL_GPL(async_xor_zero_sum);
-
-static int __init async_xor_init(void)
-{
- #ifdef CONFIG_ASYNC_TX_DMA
- /* To conserve stack space the input src_list (array of page pointers)
- * is reused to hold the array of dma addresses passed to the driver.
- * This conversion is only possible when dma_addr_t is less than the
- * the size of a pointer. HIGHMEM64G is known to violate this
- * assumption.
- */
- BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
- #endif
-
- return 0;
-}
-
-static void __exit async_xor_exit(void)
-{
- do { } while (0);
-}
-
-module_init(async_xor_init);
-module_exit(async_xor_exit);
+EXPORT_SYMBOL_GPL(async_xor_val);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
new file mode 100644
index 00000000000..3ec27c7e62e
--- /dev/null
+++ b/crypto/async_tx/raid6test.c
@@ -0,0 +1,240 @@
+/*
+ * asynchronous raid6 recovery self test
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * based on drivers/md/raid6test/test.c:
+ * Copyright 2002-2007 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/async_tx.h>
+#include <linux/random.h>
+
+#undef pr
+#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
+
+#define NDISKS 16 /* Including P and Q */
+
+static struct page *dataptrs[NDISKS];
+static addr_conv_t addr_conv[NDISKS];
+static struct page *data[NDISKS+3];
+static struct page *spare;
+static struct page *recovi;
+static struct page *recovj;
+
+static void callback(void *param)
+{
+ struct completion *cmp = param;
+
+ complete(cmp);
+}
+
+static void makedata(int disks)
+{
+ int i, j;
+
+ for (i = 0; i < disks; i++) {
+ for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
+ u32 *p = page_address(data[i]) + j;
+
+ *p = random32();
+ }
+
+ dataptrs[i] = data[i];
+ }
+}
+
+static char disk_type(int d, int disks)
+{
+ if (d == disks - 2)
+ return 'P';
+ else if (d == disks - 1)
+ return 'Q';
+ else
+ return 'D';
+}
+
+/* Recover two failed blocks. */
+static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
+{
+ struct async_submit_ctl submit;
+ struct completion cmp;
+ struct dma_async_tx_descriptor *tx = NULL;
+ enum sum_check_flags result = ~0;
+
+ if (faila > failb)
+ swap(faila, failb);
+
+ if (failb == disks-1) {
+ if (faila == disks-2) {
+ /* P+Q failure. Just rebuild the syndrome. */
+ init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+ tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
+ } else {
+ struct page *blocks[disks];
+ struct page *dest;
+ int count = 0;
+ int i;
+
+ /* data+Q failure. Reconstruct data from P,
+ * then rebuild syndrome
+ */
+ for (i = disks; i-- ; ) {
+ if (i == faila || i == failb)
+ continue;
+ blocks[count++] = ptrs[i];
+ }
+ dest = ptrs[faila];
+ init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+ NULL, NULL, addr_conv);
+ tx = async_xor(dest, blocks, 0, count, bytes, &submit);
+
+ init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
+ tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
+ }
+ } else {
+ if (failb == disks-2) {
+ /* data+P failure. */
+ init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+ tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
+ } else {
+ /* data+data failure. */
+ init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+ tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
+ }
+ }
+ init_completion(&cmp);
+ init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
+ tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
+ async_tx_issue_pending(tx);
+
+ if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
+ pr("%s: timeout! (faila: %d failb: %d disks: %d)\n",
+ __func__, faila, failb, disks);
+
+ if (result != 0)
+ pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n",
+ __func__, faila, failb, result);
+}
+
+static int test_disks(int i, int j, int disks)
+{
+ int erra, errb;
+
+ memset(page_address(recovi), 0xf0, PAGE_SIZE);
+ memset(page_address(recovj), 0xba, PAGE_SIZE);
+
+ dataptrs[i] = recovi;
+ dataptrs[j] = recovj;
+
+ raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
+
+ erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
+ errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
+
+ pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n",
+ __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks),
+ (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB");
+
+ dataptrs[i] = data[i];
+ dataptrs[j] = data[j];
+
+ return erra || errb;
+}
+
+static int test(int disks, int *tests)
+{
+ struct dma_async_tx_descriptor *tx;
+ struct async_submit_ctl submit;
+ struct completion cmp;
+ int err = 0;
+ int i, j;
+
+ recovi = data[disks];
+ recovj = data[disks+1];
+ spare = data[disks+2];
+
+ makedata(disks);
+
+ /* Nuke syndromes */
+ memset(page_address(data[disks-2]), 0xee, PAGE_SIZE);
+ memset(page_address(data[disks-1]), 0xee, PAGE_SIZE);
+
+ /* Generate assumed good syndrome */
+ init_completion(&cmp);
+ init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
+ tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
+ async_tx_issue_pending(tx);
+
+ if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
+ pr("error: initial gen_syndrome(%d) timed out\n", disks);
+ return 1;
+ }
+
+ pr("testing the %d-disk case...\n", disks);
+ for (i = 0; i < disks-1; i++)
+ for (j = i+1; j < disks; j++) {
+ (*tests)++;
+ err += test_disks(i, j, disks);
+ }
+
+ return err;
+}
+
+
+static int raid6_test(void)
+{
+ int err = 0;
+ int tests = 0;
+ int i;
+
+ for (i = 0; i < NDISKS+3; i++) {
+ data[i] = alloc_page(GFP_KERNEL);
+ if (!data[i]) {
+ while (i--)
+ put_page(data[i]);
+ return -ENOMEM;
+ }
+ }
+
+ /* the 4-disk and 5-disk cases are special for the recovery code */
+ if (NDISKS > 4)
+ err += test(4, &tests);
+ if (NDISKS > 5)
+ err += test(5, &tests);
+ err += test(NDISKS, &tests);
+
+ pr("\n");
+ pr("complete (%d tests, %d failure%s)\n",
+ tests, err, err == 1 ? "" : "s");
+
+ for (i = 0; i < NDISKS+3; i++)
+ put_page(data[i]);
+
+ return 0;
+}
+
+static void raid6_test_exit(void)
+{
+}
+
+/* when compiled-in wait for drivers to load first (assumes dma drivers
+ * are also compliled-in)
+ */
+late_initcall(raid6_test);
+module_exit(raid6_test_exit);
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index d295bdccc09..9335b87c517 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -115,6 +115,9 @@ static const struct file_operations acpi_button_state_fops = {
.release = single_release,
};
+static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
+static struct acpi_device *lid_device;
+
/* --------------------------------------------------------------------------
FS Interface (/proc)
-------------------------------------------------------------------------- */
@@ -231,11 +234,38 @@ static int acpi_button_remove_fs(struct acpi_device *device)
/* --------------------------------------------------------------------------
Driver Interface
-------------------------------------------------------------------------- */
+int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_register);
+
+int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_unregister);
+
+int acpi_lid_open(void)
+{
+ acpi_status status;
+ unsigned long long state;
+
+ status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL,
+ &state);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ return !!state;
+}
+EXPORT_SYMBOL(acpi_lid_open);
+
static int acpi_lid_send_state(struct acpi_device *device)
{
struct acpi_button *button = acpi_driver_data(device);
unsigned long long state;
acpi_status status;
+ int ret;
status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state);
if (ACPI_FAILURE(status))
@@ -244,7 +274,12 @@ static int acpi_lid_send_state(struct acpi_device *device)
/* input layer checks if event is redundant */
input_report_switch(button->input, SW_LID, !state);
input_sync(button->input);
- return 0;
+
+ ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
+ if (ret == NOTIFY_DONE)
+ ret = blocking_notifier_call_chain(&acpi_lid_notifier, state,
+ device);
+ return ret;
}
static void acpi_button_notify(struct acpi_device *device, u32 event)
@@ -366,8 +401,14 @@ static int acpi_button_add(struct acpi_device *device)
error = input_register_device(input);
if (error)
goto err_remove_fs;
- if (button->type == ACPI_BUTTON_TYPE_LID)
+ if (button->type == ACPI_BUTTON_TYPE_LID) {
acpi_lid_send_state(device);
+ /*
+ * This assumes there's only one lid device, or if there are
+ * more we only care about the last one...
+ */
+ lid_device = device;
+ }
if (device->wakeup.flags.valid) {
/* Button's GPE is run-wake GPE */
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 56071b67bed..5633b86e3ed 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -193,7 +193,7 @@ acpi_status __init acpi_os_initialize(void)
static void bind_to_cpu0(struct work_struct *work)
{
- set_cpus_allowed(current, cpumask_of_cpu(0));
+ set_cpus_allowed_ptr(current, cpumask_of(0));
kfree(work);
}
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 11088cf1031..8ba0ed0b9dd 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -511,7 +511,7 @@ int acpi_processor_preregister_performance(
struct acpi_processor *match_pr;
struct acpi_psd_package *match_pdomain;
- if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))
return -ENOMEM;
mutex_lock(&performance_mutex);
@@ -558,7 +558,6 @@ int acpi_processor_preregister_performance(
* Now that we have _PSD data from all CPUs, lets setup P-state
* domain info.
*/
- cpumask_clear(covered_cpus);
for_each_possible_cpu(i) {
pr = per_cpu(processors, i);
if (!pr)
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index ce7cf3bc510..4c6c14c1e30 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -77,7 +77,7 @@ static int acpi_processor_update_tsd_coord(void)
struct acpi_tsd_package *pdomain, *match_pdomain;
struct acpi_processor_throttling *pthrottling, *match_pthrottling;
- if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))
return -ENOMEM;
/*
@@ -105,7 +105,6 @@ static int acpi_processor_update_tsd_coord(void)
if (retval)
goto err_ret;
- cpumask_clear(covered_cpus);
for_each_possible_cpu(i) {
pr = per_cpu(processors, i);
if (!pr)
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 2de64065aa1..29e66d603d3 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -790,11 +790,15 @@ he_init_group(struct he_dev *he_dev, int group)
he_dev->rbps_base = pci_alloc_consistent(he_dev->pci_dev,
CONFIG_RBPS_SIZE * sizeof(struct he_rbp), &he_dev->rbps_phys);
if (he_dev->rbps_base == NULL) {
- hprintk("failed to alloc rbps\n");
- return -ENOMEM;
+ hprintk("failed to alloc rbps_base\n");
+ goto out_destroy_rbps_pool;
}
memset(he_dev->rbps_base, 0, CONFIG_RBPS_SIZE * sizeof(struct he_rbp));
he_dev->rbps_virt = kmalloc(CONFIG_RBPS_SIZE * sizeof(struct he_virt), GFP_KERNEL);
+ if (he_dev->rbps_virt == NULL) {
+ hprintk("failed to alloc rbps_virt\n");
+ goto out_free_rbps_base;
+ }
for (i = 0; i < CONFIG_RBPS_SIZE; ++i) {
dma_addr_t dma_handle;
@@ -802,7 +806,7 @@ he_init_group(struct he_dev *he_dev, int group)
cpuaddr = pci_pool_alloc(he_dev->rbps_pool, GFP_KERNEL|GFP_DMA, &dma_handle);
if (cpuaddr == NULL)
- return -ENOMEM;
+ goto out_free_rbps_virt;
he_dev->rbps_virt[i].virt = cpuaddr;
he_dev->rbps_base[i].status = RBP_LOANED | RBP_SMALLBUF | (i << RBP_INDEX_OFF);
@@ -827,17 +831,21 @@ he_init_group(struct he_dev *he_dev, int group)
CONFIG_RBPL_BUFSIZE, 8, 0);
if (he_dev->rbpl_pool == NULL) {
hprintk("unable to create rbpl pool\n");
- return -ENOMEM;
+ goto out_free_rbps_virt;
}
he_dev->rbpl_base = pci_alloc_consistent(he_dev->pci_dev,
CONFIG_RBPL_SIZE * sizeof(struct he_rbp), &he_dev->rbpl_phys);
if (he_dev->rbpl_base == NULL) {
- hprintk("failed to alloc rbpl\n");
- return -ENOMEM;
+ hprintk("failed to alloc rbpl_base\n");
+ goto out_destroy_rbpl_pool;
}
memset(he_dev->rbpl_base, 0, CONFIG_RBPL_SIZE * sizeof(struct he_rbp));
he_dev->rbpl_virt = kmalloc(CONFIG_RBPL_SIZE * sizeof(struct he_virt), GFP_KERNEL);
+ if (he_dev->rbpl_virt == NULL) {
+ hprintk("failed to alloc rbpl_virt\n");
+ goto out_free_rbpl_base;
+ }
for (i = 0; i < CONFIG_RBPL_SIZE; ++i) {
dma_addr_t dma_handle;
@@ -845,7 +853,7 @@ he_init_group(struct he_dev *he_dev, int group)
cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|GFP_DMA, &dma_handle);
if (cpuaddr == NULL)
- return -ENOMEM;
+ goto out_free_rbpl_virt;
he_dev->rbpl_virt[i].virt = cpuaddr;
he_dev->rbpl_base[i].status = RBP_LOANED | (i << RBP_INDEX_OFF);
@@ -870,7 +878,7 @@ he_init_group(struct he_dev *he_dev, int group)
CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq), &he_dev->rbrq_phys);
if (he_dev->rbrq_base == NULL) {
hprintk("failed to allocate rbrq\n");
- return -ENOMEM;
+ goto out_free_rbpl_virt;
}
memset(he_dev->rbrq_base, 0, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq));
@@ -894,7 +902,7 @@ he_init_group(struct he_dev *he_dev, int group)
CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq), &he_dev->tbrq_phys);
if (he_dev->tbrq_base == NULL) {
hprintk("failed to allocate tbrq\n");
- return -ENOMEM;
+ goto out_free_rbpq_base;
}
memset(he_dev->tbrq_base, 0, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq));
@@ -906,6 +914,39 @@ he_init_group(struct he_dev *he_dev, int group)
he_writel(he_dev, CONFIG_TBRQ_THRESH, G0_TBRQ_THRESH + (group * 16));
return 0;
+
+out_free_rbpq_base:
+ pci_free_consistent(he_dev->pci_dev, CONFIG_RBRQ_SIZE *
+ sizeof(struct he_rbrq), he_dev->rbrq_base,
+ he_dev->rbrq_phys);
+ i = CONFIG_RBPL_SIZE;
+out_free_rbpl_virt:
+ while (--i)
+ pci_pool_free(he_dev->rbps_pool, he_dev->rbpl_virt[i].virt,
+ he_dev->rbps_base[i].phys);
+ kfree(he_dev->rbpl_virt);
+
+out_free_rbpl_base:
+ pci_free_consistent(he_dev->pci_dev, CONFIG_RBPL_SIZE *
+ sizeof(struct he_rbp), he_dev->rbpl_base,
+ he_dev->rbpl_phys);
+out_destroy_rbpl_pool:
+ pci_pool_destroy(he_dev->rbpl_pool);
+
+ i = CONFIG_RBPL_SIZE;
+out_free_rbps_virt:
+ while (--i)
+ pci_pool_free(he_dev->rbpl_pool, he_dev->rbps_virt[i].virt,
+ he_dev->rbpl_base[i].phys);
+ kfree(he_dev->rbps_virt);
+
+out_free_rbps_base:
+ pci_free_consistent(he_dev->pci_dev, CONFIG_RBPS_SIZE *
+ sizeof(struct he_rbp), he_dev->rbps_base,
+ he_dev->rbps_phys);
+out_destroy_rbps_pool:
+ pci_pool_destroy(he_dev->rbps_pool);
+ return -ENOMEM;
}
static int __devinit
diff --git a/drivers/atm/solos-attrlist.c b/drivers/atm/solos-attrlist.c
index efa2808dd94..1a9332e4efe 100644
--- a/drivers/atm/solos-attrlist.c
+++ b/drivers/atm/solos-attrlist.c
@@ -25,6 +25,10 @@ SOLOS_ATTR_RO(RSCorrectedErrorsUp)
SOLOS_ATTR_RO(RSUnCorrectedErrorsUp)
SOLOS_ATTR_RO(InterleaveRDn)
SOLOS_ATTR_RO(InterleaveRUp)
+SOLOS_ATTR_RO(BisRDn)
+SOLOS_ATTR_RO(BisRUp)
+SOLOS_ATTR_RO(INPdown)
+SOLOS_ATTR_RO(INPup)
SOLOS_ATTR_RO(ShowtimeStart)
SOLOS_ATTR_RO(ATURVendor)
SOLOS_ATTR_RO(ATUCCountry)
@@ -62,6 +66,13 @@ SOLOS_ATTR_RW(Defaults)
SOLOS_ATTR_RW(LineMode)
SOLOS_ATTR_RW(Profile)
SOLOS_ATTR_RW(DetectNoise)
+SOLOS_ATTR_RW(BisAForceSNRMarginDn)
+SOLOS_ATTR_RW(BisMForceSNRMarginDn)
+SOLOS_ATTR_RW(BisAMaxMargin)
+SOLOS_ATTR_RW(BisMMaxMargin)
+SOLOS_ATTR_RW(AnnexAForceSNRMarginDn)
+SOLOS_ATTR_RW(AnnexAMaxMargin)
+SOLOS_ATTR_RW(AnnexMMaxMargin)
SOLOS_ATTR_RO(SupportedAnnexes)
SOLOS_ATTR_RO(Status)
SOLOS_ATTR_RO(TotalStart)
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 307321b32cb..c5f5186d62a 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -59,21 +59,29 @@
#define RX_DMA_ADDR(port) (0x30 + (4 * (port)))
#define DATA_RAM_SIZE 32768
-#define BUF_SIZE 4096
+#define BUF_SIZE 2048
+#define OLD_BUF_SIZE 4096 /* For FPGA versions <= 2*/
#define FPGA_PAGE 528 /* FPGA flash page size*/
#define SOLOS_PAGE 512 /* Solos flash page size*/
#define FPGA_BLOCK (FPGA_PAGE * 8) /* FPGA flash block size*/
#define SOLOS_BLOCK (SOLOS_PAGE * 8) /* Solos flash block size*/
-#define RX_BUF(card, nr) ((card->buffers) + (nr)*BUF_SIZE*2)
-#define TX_BUF(card, nr) ((card->buffers) + (nr)*BUF_SIZE*2 + BUF_SIZE)
+#define RX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2)
+#define TX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2 + (card->buffer_size))
+#define FLASH_BUF ((card->buffers) + 4*(card->buffer_size)*2)
#define RX_DMA_SIZE 2048
+#define FPGA_VERSION(a,b) (((a) << 8) + (b))
+#define LEGACY_BUFFERS 2
+#define DMA_SUPPORTED 4
+
static int reset = 0;
static int atmdebug = 0;
static int firmware_upgrade = 0;
static int fpga_upgrade = 0;
+static int db_firmware_upgrade = 0;
+static int db_fpga_upgrade = 0;
struct pkt_hdr {
__le16 size;
@@ -116,6 +124,8 @@ struct solos_card {
wait_queue_head_t param_wq;
wait_queue_head_t fw_wq;
int using_dma;
+ int fpga_version;
+ int buffer_size;
};
@@ -136,10 +146,14 @@ MODULE_PARM_DESC(reset, "Reset Solos chips on startup");
MODULE_PARM_DESC(atmdebug, "Print ATM data");
MODULE_PARM_DESC(firmware_upgrade, "Initiate Solos firmware upgrade");
MODULE_PARM_DESC(fpga_upgrade, "Initiate FPGA upgrade");
+MODULE_PARM_DESC(db_firmware_upgrade, "Initiate daughter board Solos firmware upgrade");
+MODULE_PARM_DESC(db_fpga_upgrade, "Initiate daughter board FPGA upgrade");
module_param(reset, int, 0444);
module_param(atmdebug, int, 0644);
module_param(firmware_upgrade, int, 0444);
module_param(fpga_upgrade, int, 0444);
+module_param(db_firmware_upgrade, int, 0444);
+module_param(db_fpga_upgrade, int, 0444);
static void fpga_queue(struct solos_card *card, int port, struct sk_buff *skb,
struct atm_vcc *vcc);
@@ -517,10 +531,32 @@ static int flash_upgrade(struct solos_card *card, int chip)
if (chip == 0) {
fw_name = "solos-FPGA.bin";
blocksize = FPGA_BLOCK;
- } else {
+ }
+
+ if (chip == 1) {
fw_name = "solos-Firmware.bin";
blocksize = SOLOS_BLOCK;
}
+
+ if (chip == 2){
+ if (card->fpga_version > LEGACY_BUFFERS){
+ fw_name = "solos-db-FPGA.bin";
+ blocksize = FPGA_BLOCK;
+ } else {
+ dev_info(&card->dev->dev, "FPGA version doesn't support daughter board upgrades\n");
+ return -EPERM;
+ }
+ }
+
+ if (chip == 3){
+ if (card->fpga_version > LEGACY_BUFFERS){
+ fw_name = "solos-Firmware.bin";
+ blocksize = SOLOS_BLOCK;
+ } else {
+ dev_info(&card->dev->dev, "FPGA version doesn't support daughter board upgrades\n");
+ return -EPERM;
+ }
+ }
if (request_firmware(&fw, fw_name, &card->dev->dev))
return -ENOENT;
@@ -536,8 +572,10 @@ static int flash_upgrade(struct solos_card *card, int chip)
data32 = ioread32(card->config_regs + FPGA_MODE);
/* Set mode to Chip Erase */
- dev_info(&card->dev->dev, "Set FPGA Flash mode to %s Chip Erase\n",
- chip?"Solos":"FPGA");
+ if(chip == 0 || chip == 2)
+ dev_info(&card->dev->dev, "Set FPGA Flash mode to FPGA Chip Erase\n");
+ if(chip == 1 || chip == 3)
+ dev_info(&card->dev->dev, "Set FPGA Flash mode to Solos Chip Erase\n");
iowrite32((chip * 2), card->config_regs + FLASH_MODE);
@@ -557,7 +595,10 @@ static int flash_upgrade(struct solos_card *card, int chip)
/* Copy block to buffer, swapping each 16 bits */
for(i = 0; i < blocksize; i += 4) {
uint32_t word = swahb32p((uint32_t *)(fw->data + offset + i));
- iowrite32(word, RX_BUF(card, 3) + i);
+ if(card->fpga_version > LEGACY_BUFFERS)
+ iowrite32(word, FLASH_BUF + i);
+ else
+ iowrite32(word, RX_BUF(card, 3) + i);
}
/* Specify block number and then trigger flash write */
@@ -630,6 +671,10 @@ void solos_bh(unsigned long card_arg)
memcpy_fromio(header, RX_BUF(card, port), sizeof(*header));
size = le16_to_cpu(header->size);
+ if (size > (card->buffer_size - sizeof(*header))){
+ dev_warn(&card->dev->dev, "Invalid buffer size\n");
+ continue;
+ }
skb = alloc_skb(size + 1, GFP_ATOMIC);
if (!skb) {
@@ -1094,12 +1139,18 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
fpga_ver = (data32 & 0x0000FFFF);
major_ver = ((data32 & 0xFF000000) >> 24);
minor_ver = ((data32 & 0x00FF0000) >> 16);
+ card->fpga_version = FPGA_VERSION(major_ver,minor_ver);
+ if (card->fpga_version > LEGACY_BUFFERS)
+ card->buffer_size = BUF_SIZE;
+ else
+ card->buffer_size = OLD_BUF_SIZE;
dev_info(&dev->dev, "Solos FPGA Version %d.%02d svn-%d\n",
major_ver, minor_ver, fpga_ver);
- if (0 && fpga_ver > 27)
+ if (card->fpga_version >= DMA_SUPPORTED){
card->using_dma = 1;
- else {
+ } else {
+ card->using_dma = 0;
/* Set RX empty flag for all ports */
iowrite32(0xF0, card->config_regs + FLAGS_ADDR);
}
@@ -1131,6 +1182,12 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
if (firmware_upgrade)
flash_upgrade(card, 1);
+ if (db_fpga_upgrade)
+ flash_upgrade(card, 2);
+
+ if (db_firmware_upgrade)
+ flash_upgrade(card, 3);
+
err = atm_init(card);
if (err)
goto out_free_irq;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 71d1b9bab70..614da5b8613 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3412,7 +3412,7 @@ static int cdrom_print_info(const char *header, int val, char *info,
return 0;
}
-static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_info(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int pos;
@@ -3489,7 +3489,7 @@ static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
goto done;
doit:
mutex_unlock(&cdrom_mutex);
- return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ return proc_dostring(ctl, write, buffer, lenp, ppos);
done:
printk(KERN_INFO "cdrom: info buffer too small\n");
goto doit;
@@ -3525,12 +3525,12 @@ static void cdrom_update_settings(void)
mutex_unlock(&cdrom_mutex);
}
-static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write) {
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 6a06913b01d..08a6f50ae79 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -1087,6 +1087,14 @@ config MMTIMER
The mmtimer device allows direct userspace access to the
Altix system timer.
+config UV_MMTIMER
+ tristate "UV_MMTIMER Memory mapped RTC for SGI UV"
+ depends on X86_UV
+ default m
+ help
+ The uv_mmtimer device allows direct userspace access to the
+ UV system timer.
+
source "drivers/char/tpm/Kconfig"
config TELCLOCK
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 66f779ad4f4..19a79dd79ee 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_RAW_DRIVER) += raw.o
obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o
obj-$(CONFIG_MSPEC) += mspec.o
obj-$(CONFIG_MMTIMER) += mmtimer.o
+obj-$(CONFIG_UV_MMTIMER) += uv_mmtimer.o
obj-$(CONFIG_VIOTAPE) += viotape.o
obj-$(CONFIG_HVCS) += hvcs.o
obj-$(CONFIG_IBM_BSR) += bsr.o
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 1540e693d91..4068467ce7b 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -46,6 +46,8 @@
#define PCI_DEVICE_ID_INTEL_Q35_IG 0x29B2
#define PCI_DEVICE_ID_INTEL_Q33_HB 0x29D0
#define PCI_DEVICE_ID_INTEL_Q33_IG 0x29D2
+#define PCI_DEVICE_ID_INTEL_B43_HB 0x2E40
+#define PCI_DEVICE_ID_INTEL_B43_IG 0x2E42
#define PCI_DEVICE_ID_INTEL_GM45_HB 0x2A40
#define PCI_DEVICE_ID_INTEL_GM45_IG 0x2A42
#define PCI_DEVICE_ID_INTEL_IGD_E_HB 0x2E00
@@ -91,6 +93,7 @@
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G45_HB || \
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_GM45_HB || \
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB || \
+ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_B43_HB || \
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB || \
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB)
@@ -804,23 +807,39 @@ static void intel_i830_setup_flush(void)
if (!intel_private.i8xx_page)
return;
- /* make page uncached */
- map_page_into_agp(intel_private.i8xx_page);
-
intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page);
if (!intel_private.i8xx_flush_page)
intel_i830_fini_flush();
}
+static void
+do_wbinvd(void *null)
+{
+ wbinvd();
+}
+
+/* The chipset_flush interface needs to get data that has already been
+ * flushed out of the CPU all the way out to main memory, because the GPU
+ * doesn't snoop those buffers.
+ *
+ * The 8xx series doesn't have the same lovely interface for flushing the
+ * chipset write buffers that the later chips do. According to the 865
+ * specs, it's 64 octwords, or 1KB. So, to get those previous things in
+ * that buffer out, we just fill 1KB and clflush it out, on the assumption
+ * that it'll push whatever was in there out. It appears to work.
+ */
static void intel_i830_chipset_flush(struct agp_bridge_data *bridge)
{
unsigned int *pg = intel_private.i8xx_flush_page;
- int i;
- for (i = 0; i < 256; i += 2)
- *(pg + i) = i;
+ memset(pg, 0, 1024);
- wmb();
+ if (cpu_has_clflush) {
+ clflush_cache_range(pg, 1024);
+ } else {
+ if (on_each_cpu(do_wbinvd, NULL, 1) != 0)
+ printk(KERN_ERR "Timed out waiting for cache flush.\n");
+ }
}
/* The intel i830 automatically initializes the agp aperture during POST.
@@ -1341,6 +1360,7 @@ static void intel_i965_get_gtt_range(int *gtt_offset, int *gtt_size)
case PCI_DEVICE_ID_INTEL_Q45_HB:
case PCI_DEVICE_ID_INTEL_G45_HB:
case PCI_DEVICE_ID_INTEL_G41_HB:
+ case PCI_DEVICE_ID_INTEL_B43_HB:
case PCI_DEVICE_ID_INTEL_IGDNG_D_HB:
case PCI_DEVICE_ID_INTEL_IGDNG_M_HB:
case PCI_DEVICE_ID_INTEL_IGDNG_MA_HB:
@@ -2335,6 +2355,8 @@ static const struct intel_driver_description {
"Q45/Q43", NULL, &intel_i965_driver },
{ PCI_DEVICE_ID_INTEL_G45_HB, PCI_DEVICE_ID_INTEL_G45_IG, 0,
"G45/G43", NULL, &intel_i965_driver },
+ { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG, 0,
+ "B43", NULL, &intel_i965_driver },
{ PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0,
"G41", NULL, &intel_i965_driver },
{ PCI_DEVICE_ID_INTEL_IGDNG_D_HB, PCI_DEVICE_ID_INTEL_IGDNG_D_IG, 0,
@@ -2535,6 +2557,7 @@ static struct pci_device_id agp_intel_pci_table[] = {
ID(PCI_DEVICE_ID_INTEL_Q45_HB),
ID(PCI_DEVICE_ID_INTEL_G45_HB),
ID(PCI_DEVICE_ID_INTEL_G41_HB),
+ ID(PCI_DEVICE_ID_INTEL_B43_HB),
ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB),
ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB),
ID(PCI_DEVICE_ID_INTEL_IGDNG_MA_HB),
diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c
index 0a01329451e..e3dd24bff51 100644
--- a/drivers/char/bfin-otp.c
+++ b/drivers/char/bfin-otp.c
@@ -1,8 +1,7 @@
/*
* Blackfin On-Chip OTP Memory Interface
- * Supports BF52x/BF54x
*
- * Copyright 2007-2008 Analog Devices Inc.
+ * Copyright 2007-2009 Analog Devices Inc.
*
* Enter bugs at http://blackfin.uclinux.org/
*
@@ -17,8 +16,10 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/types.h>
+#include <mtd/mtd-abi.h>
#include <asm/blackfin.h>
+#include <asm/bfrom.h>
#include <asm/uaccess.h>
#define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args)
@@ -30,39 +31,6 @@
static DEFINE_MUTEX(bfin_otp_lock);
-/* OTP Boot ROM functions */
-#define _BOOTROM_OTP_COMMAND 0xEF000018
-#define _BOOTROM_OTP_READ 0xEF00001A
-#define _BOOTROM_OTP_WRITE 0xEF00001C
-
-static u32 (* const otp_command)(u32 command, u32 value) = (void *)_BOOTROM_OTP_COMMAND;
-static u32 (* const otp_read)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_READ;
-static u32 (* const otp_write)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_WRITE;
-
-/* otp_command(): defines for "command" */
-#define OTP_INIT 0x00000001
-#define OTP_CLOSE 0x00000002
-
-/* otp_{read,write}(): defines for "flags" */
-#define OTP_LOWER_HALF 0x00000000 /* select upper/lower 64-bit half (bit 0) */
-#define OTP_UPPER_HALF 0x00000001
-#define OTP_NO_ECC 0x00000010 /* do not use ECC */
-#define OTP_LOCK 0x00000020 /* sets page protection bit for page */
-#define OTP_ACCESS_READ 0x00001000
-#define OTP_ACCESS_READWRITE 0x00002000
-
-/* Return values for all functions */
-#define OTP_SUCCESS 0x00000000
-#define OTP_MASTER_ERROR 0x001
-#define OTP_WRITE_ERROR 0x003
-#define OTP_READ_ERROR 0x005
-#define OTP_ACC_VIO_ERROR 0x009
-#define OTP_DATA_MULT_ERROR 0x011
-#define OTP_ECC_MULT_ERROR 0x021
-#define OTP_PREV_WR_ERROR 0x041
-#define OTP_DATA_SB_WARN 0x100
-#define OTP_ECC_SB_WARN 0x200
-
/**
* bfin_otp_read - Read OTP pages
*
@@ -86,9 +54,11 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
page = *pos / (sizeof(u64) * 2);
while (bytes_done < count) {
flags = (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF);
- stamp("processing page %i (%s)", page, (flags == OTP_UPPER_HALF ? "upper" : "lower"));
- ret = otp_read(page, flags, &content);
+ stamp("processing page %i (0x%x:%s)", page, flags,
+ (flags & OTP_UPPER_HALF ? "upper" : "lower"));
+ ret = bfrom_OtpRead(page, flags, &content);
if (ret & OTP_MASTER_ERROR) {
+ stamp("error from otp: 0x%x", ret);
bytes_done = -EIO;
break;
}
@@ -96,7 +66,7 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
bytes_done = -EFAULT;
break;
}
- if (flags == OTP_UPPER_HALF)
+ if (flags & OTP_UPPER_HALF)
++page;
bytes_done += sizeof(content);
*pos += sizeof(content);
@@ -108,14 +78,53 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
}
#ifdef CONFIG_BFIN_OTP_WRITE_ENABLE
+static bool allow_writes;
+
+/**
+ * bfin_otp_init_timing - setup OTP timing parameters
+ *
+ * Required before doing any write operation. Algorithms from HRM.
+ */
+static u32 bfin_otp_init_timing(void)
+{
+ u32 tp1, tp2, tp3, timing;
+
+ tp1 = get_sclk() / 1000000;
+ tp2 = (2 * get_sclk() / 10000000) << 8;
+ tp3 = (0x1401) << 15;
+ timing = tp1 | tp2 | tp3;
+ if (bfrom_OtpCommand(OTP_INIT, timing))
+ return 0;
+
+ return timing;
+}
+
+/**
+ * bfin_otp_deinit_timing - set timings to only allow reads
+ *
+ * Should be called after all writes are done.
+ */
+static void bfin_otp_deinit_timing(u32 timing)
+{
+ /* mask bits [31:15] so that any attempts to write fail */
+ bfrom_OtpCommand(OTP_CLOSE, 0);
+ bfrom_OtpCommand(OTP_INIT, timing & ~(-1 << 15));
+ bfrom_OtpCommand(OTP_CLOSE, 0);
+}
+
/**
- * bfin_otp_write - Write OTP pages
+ * bfin_otp_write - write OTP pages
*
* All writes must be in half page chunks (half page == 64 bits).
*/
static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t count, loff_t *pos)
{
- stampit();
+ ssize_t bytes_done;
+ u32 timing, page, base_flags, flags, ret;
+ u64 content;
+
+ if (!allow_writes)
+ return -EACCES;
if (count % sizeof(u64))
return -EMSGSIZE;
@@ -123,20 +132,96 @@ static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t
if (mutex_lock_interruptible(&bfin_otp_lock))
return -ERESTARTSYS;
- /* need otp_init() documentation before this can be implemented */
+ stampit();
+
+ timing = bfin_otp_init_timing();
+ if (timing == 0) {
+ mutex_unlock(&bfin_otp_lock);
+ return -EIO;
+ }
+
+ base_flags = OTP_CHECK_FOR_PREV_WRITE;
+
+ bytes_done = 0;
+ page = *pos / (sizeof(u64) * 2);
+ while (bytes_done < count) {
+ flags = base_flags | (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF);
+ stamp("processing page %i (0x%x:%s) from %p", page, flags,
+ (flags & OTP_UPPER_HALF ? "upper" : "lower"), buff + bytes_done);
+ if (copy_from_user(&content, buff + bytes_done, sizeof(content))) {
+ bytes_done = -EFAULT;
+ break;
+ }
+ ret = bfrom_OtpWrite(page, flags, &content);
+ if (ret & OTP_MASTER_ERROR) {
+ stamp("error from otp: 0x%x", ret);
+ bytes_done = -EIO;
+ break;
+ }
+ if (flags & OTP_UPPER_HALF)
+ ++page;
+ bytes_done += sizeof(content);
+ *pos += sizeof(content);
+ }
+
+ bfin_otp_deinit_timing(timing);
mutex_unlock(&bfin_otp_lock);
+ return bytes_done;
+}
+
+static long bfin_otp_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
+{
+ stampit();
+
+ switch (cmd) {
+ case OTPLOCK: {
+ u32 timing;
+ int ret = -EIO;
+
+ if (!allow_writes)
+ return -EACCES;
+
+ if (mutex_lock_interruptible(&bfin_otp_lock))
+ return -ERESTARTSYS;
+
+ timing = bfin_otp_init_timing();
+ if (timing) {
+ u32 otp_result = bfrom_OtpWrite(arg, OTP_LOCK, NULL);
+ stamp("locking page %lu resulted in 0x%x", arg, otp_result);
+ if (!(otp_result & OTP_MASTER_ERROR))
+ ret = 0;
+
+ bfin_otp_deinit_timing(timing);
+ }
+
+ mutex_unlock(&bfin_otp_lock);
+
+ return ret;
+ }
+
+ case MEMLOCK:
+ allow_writes = false;
+ return 0;
+
+ case MEMUNLOCK:
+ allow_writes = true;
+ return 0;
+ }
+
return -EINVAL;
}
#else
# define bfin_otp_write NULL
+# define bfin_otp_ioctl NULL
#endif
static struct file_operations bfin_otp_fops = {
- .owner = THIS_MODULE,
- .read = bfin_otp_read,
- .write = bfin_otp_write,
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = bfin_otp_ioctl,
+ .read = bfin_otp_read,
+ .write = bfin_otp_write,
};
static struct miscdevice bfin_otp_misc_device = {
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 4a9f3492b92..70a770ac013 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -166,9 +166,8 @@ static irqreturn_t hpet_interrupt(int irq, void *data)
unsigned long m, t;
t = devp->hd_ireqfreq;
- m = read_counter(&devp->hd_hpet->hpet_mc);
- write_counter(t + m + devp->hd_hpets->hp_delta,
- &devp->hd_timer->hpet_compare);
+ m = read_counter(&devp->hd_timer->hpet_compare);
+ write_counter(t + m, &devp->hd_timer->hpet_compare);
}
if (devp->hd_flags & HPET_SHARED_IRQ)
@@ -504,21 +503,25 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp)
g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK;
if (devp->hd_flags & HPET_PERIODIC) {
- write_counter(t, &timer->hpet_compare);
g |= Tn_TYPE_CNF_MASK;
- v |= Tn_TYPE_CNF_MASK;
- writeq(v, &timer->hpet_config);
- v |= Tn_VAL_SET_CNF_MASK;
+ v |= Tn_TYPE_CNF_MASK | Tn_VAL_SET_CNF_MASK;
writeq(v, &timer->hpet_config);
local_irq_save(flags);
- /* NOTE: what we modify here is a hidden accumulator
+ /*
+ * NOTE: First we modify the hidden accumulator
* register supported by periodic-capable comparators.
* We never want to modify the (single) counter; that
- * would affect all the comparators.
+ * would affect all the comparators. The value written
+ * is the counter value when the first interrupt is due.
*/
m = read_counter(&hpet->hpet_mc);
write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare);
+ /*
+ * Then we modify the comparator, indicating the period
+ * for subsequent interrupt.
+ */
+ write_counter(t, &timer->hpet_compare);
} else {
local_irq_save(flags);
m = read_counter(&hpet->hpet_mc);
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 0aede1d6a9e..6c8b65d069e 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -690,7 +690,7 @@ static ssize_t read_zero(struct file * file, char __user * buf,
if (chunk > PAGE_SIZE)
chunk = PAGE_SIZE; /* Just for latency reasons */
- unwritten = clear_user(buf, chunk);
+ unwritten = __clear_user(buf, chunk);
written += chunk - unwritten;
if (unwritten)
break;
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index 94ad2c3bfc4..a4ec50c9507 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -281,12 +281,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
case IOCTL_MW_REGISTER_IPC: {
unsigned int ipcnum = (unsigned int) ioarg;
- PRINTK_3(TRACE_MWAVE,
- "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
- " ipcnum %x entry usIntCount %x\n",
- ipcnum,
- pDrvData->IPCs[ipcnum].usIntCount);
-
if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) {
PRINTK_ERROR(KERN_ERR_MWAVE
"mwavedd::mwave_ioctl:"
@@ -295,6 +289,12 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
ipcnum);
return -EINVAL;
}
+ PRINTK_3(TRACE_MWAVE,
+ "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
+ " ipcnum %x entry usIntCount %x\n",
+ ipcnum,
+ pDrvData->IPCs[ipcnum].usIntCount);
+
lock_kernel();
pDrvData->IPCs[ipcnum].bIsHere = FALSE;
pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
@@ -310,11 +310,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
case IOCTL_MW_GET_IPC: {
unsigned int ipcnum = (unsigned int) ioarg;
- PRINTK_3(TRACE_MWAVE,
- "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC"
- " ipcnum %x, usIntCount %x\n",
- ipcnum,
- pDrvData->IPCs[ipcnum].usIntCount);
if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) {
PRINTK_ERROR(KERN_ERR_MWAVE
"mwavedd::mwave_ioctl:"
@@ -322,6 +317,11 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
" Invalid ipcnum %x\n", ipcnum);
return -EINVAL;
}
+ PRINTK_3(TRACE_MWAVE,
+ "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC"
+ " ipcnum %x, usIntCount %x\n",
+ ipcnum,
+ pDrvData->IPCs[ipcnum].usIntCount);
lock_kernel();
if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
diff --git a/drivers/char/random.c b/drivers/char/random.c
index d8a9255e1a3..04b505e5a5e 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1231,7 +1231,7 @@ static char sysctl_bootid[16];
* as an ASCII string in the standard UUID format. If accesses via the
* sysctl system call, it is returned as 16 bytes of binary data.
*/
-static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
+static int proc_do_uuid(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
ctl_table fake_table;
@@ -1254,7 +1254,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
fake_table.data = buf;
fake_table.maxlen = sizeof(buf);
- return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos);
+ return proc_dostring(&fake_table, write, buffer, lenp, ppos);
}
static int uuid_strategy(ctl_table *table,
diff --git a/drivers/char/rio/rioctrl.c b/drivers/char/rio/rioctrl.c
index eecee0f576d..74339559f0b 100644
--- a/drivers/char/rio/rioctrl.c
+++ b/drivers/char/rio/rioctrl.c
@@ -873,7 +873,7 @@ int riocontrol(struct rio_info *p, dev_t dev, int cmd, unsigned long arg, int su
/*
** It is important that the product code is an unsigned object!
*/
- if (DownLoad.ProductCode > MAX_PRODUCT) {
+ if (DownLoad.ProductCode >= MAX_PRODUCT) {
rio_dprintk(RIO_DEBUG_CTRL, "RIO_DOWNLOAD: Bad product code %d passed\n", DownLoad.ProductCode);
p->RIOError.Error = NO_SUCH_PRODUCT;
return -ENXIO;
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 32b957efa42..45d58002b06 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -742,7 +742,7 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read);
* the module usage count.
*/
#define TPM_ORD_PCR_EXTEND cpu_to_be32(20)
-#define EXTEND_PCR_SIZE 34
+#define EXTEND_PCR_RESULT_SIZE 34
static struct tpm_input_header pcrextend_header = {
.tag = TPM_TAG_RQU_COMMAND,
.length = cpu_to_be32(34),
@@ -760,10 +760,9 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
return -ENODEV;
cmd.header.in = pcrextend_header;
- BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE);
cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
- rc = transmit_cmd(chip, &cmd, cmd.header.in.length,
+ rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
"attempting extend a PCR value");
module_put(chip->dev->driver->owner);
diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c
new file mode 100644
index 00000000000..867b67be9f0
--- /dev/null
+++ b/drivers/char/uv_mmtimer.c
@@ -0,0 +1,216 @@
+/*
+ * Timer device implementation for SGI UV platform.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2009 Silicon Graphics, Inc. All rights reserved.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mmtimer.h>
+#include <linux/miscdevice.h>
+#include <linux/posix-timers.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/smp_lock.h>
+
+#include <asm/genapic.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+
+MODULE_AUTHOR("Dimitri Sivanich <sivanich@sgi.com>");
+MODULE_DESCRIPTION("SGI UV Memory Mapped RTC Timer");
+MODULE_LICENSE("GPL");
+
+/* name of the device, usually in /dev */
+#define UV_MMTIMER_NAME "mmtimer"
+#define UV_MMTIMER_DESC "SGI UV Memory Mapped RTC Timer"
+#define UV_MMTIMER_VERSION "1.0"
+
+static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg);
+static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma);
+
+/*
+ * Period in femtoseconds (10^-15 s)
+ */
+static unsigned long uv_mmtimer_femtoperiod;
+
+static const struct file_operations uv_mmtimer_fops = {
+ .owner = THIS_MODULE,
+ .mmap = uv_mmtimer_mmap,
+ .unlocked_ioctl = uv_mmtimer_ioctl,
+};
+
+/**
+ * uv_mmtimer_ioctl - ioctl interface for /dev/uv_mmtimer
+ * @file: file structure for the device
+ * @cmd: command to execute
+ * @arg: optional argument to command
+ *
+ * Executes the command specified by @cmd. Returns 0 for success, < 0 for
+ * failure.
+ *
+ * Valid commands:
+ *
+ * %MMTIMER_GETOFFSET - Should return the offset (relative to the start
+ * of the page where the registers are mapped) for the counter in question.
+ *
+ * %MMTIMER_GETRES - Returns the resolution of the clock in femto (10^-15)
+ * seconds
+ *
+ * %MMTIMER_GETFREQ - Copies the frequency of the clock in Hz to the address
+ * specified by @arg
+ *
+ * %MMTIMER_GETBITS - Returns the number of bits in the clock's counter
+ *
+ * %MMTIMER_MMAPAVAIL - Returns 1 if registers can be mmap'd into userspace
+ *
+ * %MMTIMER_GETCOUNTER - Gets the current value in the counter and places it
+ * in the address specified by @arg.
+ */
+static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = 0;
+
+ switch (cmd) {
+ case MMTIMER_GETOFFSET: /* offset of the counter */
+ /*
+ * UV RTC register is on its own page
+ */
+ if (PAGE_SIZE <= (1 << 16))
+ ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1))
+ / 8;
+ else
+ ret = -ENOSYS;
+ break;
+
+ case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */
+ if (copy_to_user((unsigned long __user *)arg,
+ &uv_mmtimer_femtoperiod, sizeof(unsigned long)))
+ ret = -EFAULT;
+ break;
+
+ case MMTIMER_GETFREQ: /* frequency in Hz */
+ if (copy_to_user((unsigned long __user *)arg,
+ &sn_rtc_cycles_per_second,
+ sizeof(unsigned long)))
+ ret = -EFAULT;
+ break;
+
+ case MMTIMER_GETBITS: /* number of bits in the clock */
+ ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK);
+ break;
+
+ case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */
+ ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0;
+ break;
+
+ case MMTIMER_GETCOUNTER:
+ if (copy_to_user((unsigned long __user *)arg,
+ (unsigned long *)uv_local_mmr_address(UVH_RTC),
+ sizeof(unsigned long)))
+ ret = -EFAULT;
+ break;
+ default:
+ ret = -ENOTTY;
+ break;
+ }
+ return ret;
+}
+
+/**
+ * uv_mmtimer_mmap - maps the clock's registers into userspace
+ * @file: file structure for the device
+ * @vma: VMA to map the registers into
+ *
+ * Calls remap_pfn_range() to map the clock's registers into
+ * the calling process' address space.
+ */
+static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long uv_mmtimer_addr;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_flags & VM_WRITE)
+ return -EPERM;
+
+ if (PAGE_SIZE > (1 << 16))
+ return -ENOSYS;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ uv_mmtimer_addr = UV_LOCAL_MMR_BASE | UVH_RTC;
+ uv_mmtimer_addr &= ~(PAGE_SIZE - 1);
+ uv_mmtimer_addr &= 0xfffffffffffffffUL;
+
+ if (remap_pfn_range(vma, vma->vm_start, uv_mmtimer_addr >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ printk(KERN_ERR "remap_pfn_range failed in uv_mmtimer_mmap\n");
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static struct miscdevice uv_mmtimer_miscdev = {
+ MISC_DYNAMIC_MINOR,
+ UV_MMTIMER_NAME,
+ &uv_mmtimer_fops
+};
+
+
+/**
+ * uv_mmtimer_init - device initialization routine
+ *
+ * Does initial setup for the uv_mmtimer device.
+ */
+static int __init uv_mmtimer_init(void)
+{
+ if (!is_uv_system()) {
+ printk(KERN_ERR "%s: Hardware unsupported\n", UV_MMTIMER_NAME);
+ return -1;
+ }
+
+ /*
+ * Sanity check the cycles/sec variable
+ */
+ if (sn_rtc_cycles_per_second < 100000) {
+ printk(KERN_ERR "%s: unable to determine clock frequency\n",
+ UV_MMTIMER_NAME);
+ return -1;
+ }
+
+ uv_mmtimer_femtoperiod = ((unsigned long)1E15 +
+ sn_rtc_cycles_per_second / 2) /
+ sn_rtc_cycles_per_second;
+
+ if (misc_register(&uv_mmtimer_miscdev)) {
+ printk(KERN_ERR "%s: failed to register device\n",
+ UV_MMTIMER_NAME);
+ return -1;
+ }
+
+ printk(KERN_INFO "%s: v%s, %ld MHz\n", UV_MMTIMER_DESC,
+ UV_MMTIMER_VERSION,
+ sn_rtc_cycles_per_second/(unsigned long)1E6);
+
+ return 0;
+}
+
+module_init(uv_mmtimer_init);
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 25b743abfb5..52e6bb70a49 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -28,7 +28,7 @@
#include <linux/device.h>
#include <linux/dca.h>
-#define DCA_VERSION "1.8"
+#define DCA_VERSION "1.12.1"
MODULE_VERSION(DCA_VERSION);
MODULE_LICENSE("GPL");
@@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation");
static DEFINE_SPINLOCK(dca_lock);
-static LIST_HEAD(dca_providers);
+static LIST_HEAD(dca_domains);
-static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
{
- struct dca_provider *dca, *ret = NULL;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_bus *bus = pdev->bus;
- list_for_each_entry(dca, &dca_providers, node) {
- if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
- ret = dca;
- break;
- }
+ while (bus->parent)
+ bus = bus->parent;
+
+ return bus;
+}
+
+static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
+{
+ struct dca_domain *domain;
+
+ domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
+ if (!domain)
+ return NULL;
+
+ INIT_LIST_HEAD(&domain->dca_providers);
+ domain->pci_rc = rc;
+
+ return domain;
+}
+
+static void dca_free_domain(struct dca_domain *domain)
+{
+ list_del(&domain->node);
+ kfree(domain);
+}
+
+static struct dca_domain *dca_find_domain(struct pci_bus *rc)
+{
+ struct dca_domain *domain;
+
+ list_for_each_entry(domain, &dca_domains, node)
+ if (domain->pci_rc == rc)
+ return domain;
+
+ return NULL;
+}
+
+static struct dca_domain *dca_get_domain(struct device *dev)
+{
+ struct pci_bus *rc;
+ struct dca_domain *domain;
+
+ rc = dca_pci_rc_from_dev(dev);
+ domain = dca_find_domain(rc);
+
+ if (!domain) {
+ domain = dca_allocate_domain(rc);
+ if (domain)
+ list_add(&domain->node, &dca_domains);
+ }
+
+ return domain;
+}
+
+static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+{
+ struct dca_provider *dca;
+ struct pci_bus *rc;
+ struct dca_domain *domain;
+
+ if (dev) {
+ rc = dca_pci_rc_from_dev(dev);
+ domain = dca_find_domain(rc);
+ if (!domain)
+ return NULL;
+ } else {
+ if (!list_empty(&dca_domains))
+ domain = list_first_entry(&dca_domains,
+ struct dca_domain,
+ node);
+ else
+ return NULL;
}
- return ret;
+ list_for_each_entry(dca, &domain->dca_providers, node)
+ if ((!dev) || (dca->ops->dev_managed(dca, dev)))
+ return dca;
+
+ return NULL;
}
/**
@@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev)
struct dca_provider *dca;
int err, slot = -ENODEV;
unsigned long flags;
+ struct pci_bus *pci_rc;
+ struct dca_domain *domain;
if (!dev)
return -EFAULT;
@@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev)
return -EEXIST;
}
- list_for_each_entry(dca, &dca_providers, node) {
+ pci_rc = dca_pci_rc_from_dev(dev);
+ domain = dca_find_domain(pci_rc);
+ if (!domain) {
+ spin_unlock_irqrestore(&dca_lock, flags);
+ return -ENODEV;
+ }
+
+ list_for_each_entry(dca, &domain->dca_providers, node) {
slot = dca->ops->add_requester(dca, dev);
if (slot >= 0)
break;
@@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
{
int err;
unsigned long flags;
+ struct dca_domain *domain;
err = dca_sysfs_add_provider(dca, dev);
if (err)
return err;
spin_lock_irqsave(&dca_lock, flags);
- list_add(&dca->node, &dca_providers);
+ domain = dca_get_domain(dev);
+ if (!domain) {
+ spin_unlock_irqrestore(&dca_lock, flags);
+ return -ENODEV;
+ }
+ list_add(&dca->node, &domain->dca_providers);
spin_unlock_irqrestore(&dca_lock, flags);
blocking_notifier_call_chain(&dca_provider_chain,
@@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
* unregister_dca_provider - remove a dca provider
* @dca - struct created by alloc_dca_provider()
*/
-void unregister_dca_provider(struct dca_provider *dca)
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
{
unsigned long flags;
+ struct pci_bus *pci_rc;
+ struct dca_domain *domain;
blocking_notifier_call_chain(&dca_provider_chain,
DCA_PROVIDER_REMOVE, NULL);
spin_lock_irqsave(&dca_lock, flags);
+
list_del(&dca->node);
+
+ pci_rc = dca_pci_rc_from_dev(dev);
+ domain = dca_find_domain(pci_rc);
+ if (list_empty(&domain->dca_providers))
+ dca_free_domain(domain);
+
spin_unlock_irqrestore(&dca_lock, flags);
dca_sysfs_remove_provider(dca);
@@ -276,7 +372,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify);
static int __init dca_init(void)
{
- printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION);
+ pr_info("dca service started, version %s\n", DCA_VERSION);
return dca_sysfs_init();
}
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 81e1020fb51..5903a88351b 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -17,11 +17,15 @@ if DMADEVICES
comment "DMA Devices"
+config ASYNC_TX_DISABLE_CHANNEL_SWITCH
+ bool
+
config INTEL_IOATDMA
tristate "Intel I/OAT DMA support"
depends on PCI && X86
select DMA_ENGINE
select DCA
+ select ASYNC_TX_DISABLE_CHANNEL_SWITCH
help
Enable support for the Intel(R) I/OAT DMA engine present
in recent Intel Xeon chipsets.
@@ -97,6 +101,14 @@ config TXX9_DMAC
Support the TXx9 SoC internal DMA controller. This can be
integrated in chips such as the Toshiba TX4927/38/39.
+config SH_DMAE
+ tristate "Renesas SuperH DMAC support"
+ depends on SUPERH && SH_DMA
+ depends on !SH_DMA_API
+ select DMA_ENGINE
+ help
+ Enable support for the Renesas SuperH DMA controllers.
+
config DMA_ENGINE
bool
@@ -116,7 +128,7 @@ config NET_DMA
config ASYNC_TX_DMA
bool "Async_tx: Offload support for the async_tx api"
- depends on DMA_ENGINE && !HIGHMEM64G
+ depends on DMA_ENGINE
help
This allows the async_tx api to take advantage of offload engines for
memcpy, memset, xor, and raid6 p+q operations. If your platform has
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 40e1e008357..eca71ba78ae 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,8 +1,7 @@
obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
obj-$(CONFIG_NET_DMA) += iovlock.o
obj-$(CONFIG_DMATEST) += dmatest.o
-obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
-ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioat/
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
obj-$(CONFIG_FSL_DMA) += fsldma.o
obj-$(CONFIG_MV_XOR) += mv_xor.o
@@ -10,3 +9,4 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o
obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
obj-$(CONFIG_MX3_IPU) += ipu/
obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
+obj-$(CONFIG_SH_DMAE) += shdma.o
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index c8522e6f1ad..7585c4164bd 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -87,6 +87,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys);
if (desc) {
memset(desc, 0, sizeof(struct at_desc));
+ INIT_LIST_HEAD(&desc->tx_list);
dma_async_tx_descriptor_init(&desc->txd, chan);
/* txd.flags will be overwritten in prep functions */
desc->txd.flags = DMA_CTRL_ACK;
@@ -150,11 +151,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
struct at_desc *child;
spin_lock_bh(&atchan->lock);
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
dev_vdbg(chan2dev(&atchan->chan_common),
"moving child desc %p to freelist\n",
child);
- list_splice_init(&desc->txd.tx_list, &atchan->free_list);
+ list_splice_init(&desc->tx_list, &atchan->free_list);
dev_vdbg(chan2dev(&atchan->chan_common),
"moving desc %p to freelist\n", desc);
list_add(&desc->desc_node, &atchan->free_list);
@@ -247,30 +248,33 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
param = txd->callback_param;
/* move children to free_list */
- list_splice_init(&txd->tx_list, &atchan->free_list);
+ list_splice_init(&desc->tx_list, &atchan->free_list);
/* move myself to free_list */
list_move(&desc->desc_node, &atchan->free_list);
/* unmap dma addresses */
- if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
- dma_unmap_single(chan2parent(&atchan->chan_common),
- desc->lli.daddr,
- desc->len, DMA_FROM_DEVICE);
- else
- dma_unmap_page(chan2parent(&atchan->chan_common),
- desc->lli.daddr,
- desc->len, DMA_FROM_DEVICE);
- }
- if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
- dma_unmap_single(chan2parent(&atchan->chan_common),
- desc->lli.saddr,
- desc->len, DMA_TO_DEVICE);
- else
- dma_unmap_page(chan2parent(&atchan->chan_common),
- desc->lli.saddr,
- desc->len, DMA_TO_DEVICE);
+ if (!atchan->chan_common.private) {
+ struct device *parent = chan2parent(&atchan->chan_common);
+ if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+ dma_unmap_single(parent,
+ desc->lli.daddr,
+ desc->len, DMA_FROM_DEVICE);
+ else
+ dma_unmap_page(parent,
+ desc->lli.daddr,
+ desc->len, DMA_FROM_DEVICE);
+ }
+ if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+ dma_unmap_single(parent,
+ desc->lli.saddr,
+ desc->len, DMA_TO_DEVICE);
+ else
+ dma_unmap_page(parent,
+ desc->lli.saddr,
+ desc->len, DMA_TO_DEVICE);
+ }
}
/*
@@ -334,7 +338,7 @@ static void atc_cleanup_descriptors(struct at_dma_chan *atchan)
/* This one is currently in progress */
return;
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
if (!(child->lli.ctrla & ATC_DONE))
/* Currently in progress */
return;
@@ -407,7 +411,7 @@ static void atc_handle_error(struct at_dma_chan *atchan)
dev_crit(chan2dev(&atchan->chan_common),
" cookie: %d\n", bad_desc->txd.cookie);
atc_dump_lli(atchan, &bad_desc->lli);
- list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
atc_dump_lli(atchan, &child->lli);
/* Pretend the descriptor completed successfully */
@@ -587,7 +591,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
prev->lli.dscr = desc->txd.phys;
/* insert the link descriptor to the LD ring */
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
}
@@ -646,8 +650,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
reg_width = atslave->reg_width;
- sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
-
ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN;
@@ -687,7 +689,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
prev->lli.dscr = desc->txd.phys;
/* insert the link descriptor to the LD ring */
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
total_len += len;
@@ -729,7 +731,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
prev->lli.dscr = desc->txd.phys;
/* insert the link descriptor to the LD ring */
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
total_len += len;
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index 4c972afc49e..495457e3dc4 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -165,6 +165,7 @@ struct at_desc {
struct at_lli lli;
/* THEN values for driver housekeeping */
+ struct list_head tx_list;
struct dma_async_tx_descriptor txd;
struct list_head desc_node;
size_t len;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 5a87384ea4f..bd0b248de2c 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -608,6 +608,40 @@ void dmaengine_put(void)
}
EXPORT_SYMBOL(dmaengine_put);
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+ /* A device that satisfies this test has channels that will never cause
+ * an async_tx channel switch event as all possible operation types can
+ * be handled.
+ */
+ #ifdef CONFIG_ASYNC_TX_DMA
+ if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+ if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
+ if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+ if (!dma_has_cap(DMA_XOR, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+ if (!dma_has_cap(DMA_PQ, device->cap_mask))
+ return false;
+ #endif
+
+ return true;
+}
+
static int get_dma_id(struct dma_device *device)
{
int rc;
@@ -644,8 +678,12 @@ int dma_async_device_register(struct dma_device *device)
!device->device_prep_dma_memcpy);
BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
!device->device_prep_dma_xor);
- BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
- !device->device_prep_dma_zero_sum);
+ BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
+ !device->device_prep_dma_xor_val);
+ BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
+ !device->device_prep_dma_pq);
+ BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
+ !device->device_prep_dma_pq_val);
BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
!device->device_prep_dma_memset);
BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
@@ -661,6 +699,12 @@ int dma_async_device_register(struct dma_device *device)
BUG_ON(!device->device_issue_pending);
BUG_ON(!device->dev);
+ /* note: this only matters in the
+ * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
+ */
+ if (device_has_all_tx_types(device))
+ dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
if (!idr_ref)
return -ENOMEM;
@@ -933,55 +977,29 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
{
tx->chan = chan;
spin_lock_init(&tx->lock);
- INIT_LIST_HEAD(&tx->tx_list);
}
EXPORT_SYMBOL(dma_async_tx_descriptor_init);
/* dma_wait_for_async_tx - spin wait for a transaction to complete
* @tx: in-flight transaction to wait on
- *
- * This routine assumes that tx was obtained from a call to async_memcpy,
- * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
- * and submitted). Walking the parent chain is only meant to cover for DMA
- * drivers that do not implement the DMA_INTERRUPT capability and may race with
- * the driver's descriptor cleanup routine.
*/
enum dma_status
dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
{
- enum dma_status status;
- struct dma_async_tx_descriptor *iter;
- struct dma_async_tx_descriptor *parent;
+ unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
if (!tx)
return DMA_SUCCESS;
- WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
- " %s\n", __func__, dma_chan_name(tx->chan));
-
- /* poll through the dependency chain, return when tx is complete */
- do {
- iter = tx;
-
- /* find the root of the unsubmitted dependency chain */
- do {
- parent = iter->parent;
- if (!parent)
- break;
- else
- iter = parent;
- } while (parent);
-
- /* there is a small window for ->parent == NULL and
- * ->cookie == -EBUSY
- */
- while (iter->cookie == -EBUSY)
- cpu_relax();
-
- status = dma_sync_wait(iter->chan, iter->cookie);
- } while (status == DMA_IN_PROGRESS || (iter != tx));
-
- return status;
+ while (tx->cookie == -EBUSY) {
+ if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+ pr_err("%s timeout waiting for descriptor submission\n",
+ __func__);
+ return DMA_ERROR;
+ }
+ cpu_relax();
+ }
+ return dma_sync_wait(tx->chan, tx->cookie);
}
EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index d93017fc787..a32a4cf7b1e 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -48,6 +48,11 @@ module_param(xor_sources, uint, S_IRUGO);
MODULE_PARM_DESC(xor_sources,
"Number of xor source buffers (default: 3)");
+static unsigned int pq_sources = 3;
+module_param(pq_sources, uint, S_IRUGO);
+MODULE_PARM_DESC(pq_sources,
+ "Number of p+q source buffers (default: 3)");
+
/*
* Initialization patterns. All bytes in the source buffer has bit 7
* set, all bytes in the destination buffer has bit 7 cleared.
@@ -232,6 +237,7 @@ static int dmatest_func(void *data)
dma_cookie_t cookie;
enum dma_status status;
enum dma_ctrl_flags flags;
+ u8 pq_coefs[pq_sources];
int ret;
int src_cnt;
int dst_cnt;
@@ -248,6 +254,11 @@ static int dmatest_func(void *data)
else if (thread->type == DMA_XOR) {
src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
dst_cnt = 1;
+ } else if (thread->type == DMA_PQ) {
+ src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
+ dst_cnt = 2;
+ for (i = 0; i < pq_sources; i++)
+ pq_coefs[i] = 1;
} else
goto err_srcs;
@@ -283,6 +294,7 @@ static int dmatest_func(void *data)
dma_addr_t dma_dsts[dst_cnt];
struct completion cmp;
unsigned long tmo = msecs_to_jiffies(3000);
+ u8 align = 0;
total_tests++;
@@ -290,6 +302,18 @@ static int dmatest_func(void *data)
src_off = dmatest_random() % (test_buf_size - len + 1);
dst_off = dmatest_random() % (test_buf_size - len + 1);
+ /* honor alignment restrictions */
+ if (thread->type == DMA_MEMCPY)
+ align = dev->copy_align;
+ else if (thread->type == DMA_XOR)
+ align = dev->xor_align;
+ else if (thread->type == DMA_PQ)
+ align = dev->pq_align;
+
+ len = (len >> align) << align;
+ src_off = (src_off >> align) << align;
+ dst_off = (dst_off >> align) << align;
+
dmatest_init_srcs(thread->srcs, src_off, len);
dmatest_init_dsts(thread->dsts, dst_off, len);
@@ -306,6 +330,7 @@ static int dmatest_func(void *data)
DMA_BIDIRECTIONAL);
}
+
if (thread->type == DMA_MEMCPY)
tx = dev->device_prep_dma_memcpy(chan,
dma_dsts[0] + dst_off,
@@ -316,6 +341,15 @@ static int dmatest_func(void *data)
dma_dsts[0] + dst_off,
dma_srcs, xor_sources,
len, flags);
+ else if (thread->type == DMA_PQ) {
+ dma_addr_t dma_pq[dst_cnt];
+
+ for (i = 0; i < dst_cnt; i++)
+ dma_pq[i] = dma_dsts[i] + dst_off;
+ tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+ pq_sources, pq_coefs,
+ len, flags);
+ }
if (!tx) {
for (i = 0; i < src_cnt; i++)
@@ -459,6 +493,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
op = "copy";
else if (type == DMA_XOR)
op = "xor";
+ else if (type == DMA_PQ)
+ op = "pq";
else
return -EINVAL;
@@ -514,6 +550,10 @@ static int dmatest_add_channel(struct dma_chan *chan)
cnt = dmatest_add_threads(dtc, DMA_XOR);
thread_count += cnt > 0 ? cnt : 0;
}
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ cnt = dmatest_add_threads(dtc, DMA_PQ);
+ thread_count += cnt > 0 ?: 0;
+ }
pr_info("dmatest: Started %u threads using %s\n",
thread_count, dma_chan_name(chan));
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 933c143b6a7..2eea823516a 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -116,7 +116,7 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
{
struct dw_desc *child;
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
dma_sync_single_for_cpu(chan2parent(&dwc->chan),
child->txd.phys, sizeof(child->lli),
DMA_TO_DEVICE);
@@ -137,11 +137,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
dwc_sync_desc_for_cpu(dwc, desc);
spin_lock_bh(&dwc->lock);
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
dev_vdbg(chan2dev(&dwc->chan),
"moving child desc %p to freelist\n",
child);
- list_splice_init(&desc->txd.tx_list, &dwc->free_list);
+ list_splice_init(&desc->tx_list, &dwc->free_list);
dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
list_add(&desc->desc_node, &dwc->free_list);
spin_unlock_bh(&dwc->lock);
@@ -209,19 +209,28 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
param = txd->callback_param;
dwc_sync_desc_for_cpu(dwc, desc);
- list_splice_init(&txd->tx_list, &dwc->free_list);
+ list_splice_init(&desc->tx_list, &dwc->free_list);
list_move(&desc->desc_node, &dwc->free_list);
- /*
- * We use dma_unmap_page() regardless of how the buffers were
- * mapped before they were submitted...
- */
- if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP))
- dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar,
- desc->len, DMA_FROM_DEVICE);
- if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
- dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar,
- desc->len, DMA_TO_DEVICE);
+ if (!dwc->chan.private) {
+ struct device *parent = chan2parent(&dwc->chan);
+ if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+ dma_unmap_single(parent, desc->lli.dar,
+ desc->len, DMA_FROM_DEVICE);
+ else
+ dma_unmap_page(parent, desc->lli.dar,
+ desc->len, DMA_FROM_DEVICE);
+ }
+ if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+ dma_unmap_single(parent, desc->lli.sar,
+ desc->len, DMA_TO_DEVICE);
+ else
+ dma_unmap_page(parent, desc->lli.sar,
+ desc->len, DMA_TO_DEVICE);
+ }
+ }
/*
* The API requires that no submissions are done from a
@@ -289,7 +298,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
/* This one is currently in progress */
return;
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
if (child->lli.llp == llp)
/* Currently in progress */
return;
@@ -356,7 +365,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
" cookie: %d\n", bad_desc->txd.cookie);
dwc_dump_lli(dwc, &bad_desc->lli);
- list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
dwc_dump_lli(dwc, &child->lli);
/* Pretend the descriptor completed successfully */
@@ -608,7 +617,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
prev->txd.phys, sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
}
@@ -658,8 +667,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
reg_width = dws->reg_width;
prev = first = NULL;
- sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
-
switch (direction) {
case DMA_TO_DEVICE:
ctllo = (DWC_DEFAULT_CTLLO
@@ -700,7 +707,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
total_len += len;
@@ -746,7 +753,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
total_len += len;
@@ -902,6 +909,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
break;
}
+ INIT_LIST_HEAD(&desc->tx_list);
dma_async_tx_descriptor_init(&desc->txd, chan);
desc->txd.tx_submit = dwc_tx_submit;
desc->txd.flags = DMA_CTRL_ACK;
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 13a58076703..d9a939f67f4 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -217,6 +217,7 @@ struct dw_desc {
/* THEN values for driver housekeeping */
struct list_head desc_node;
+ struct list_head tx_list;
struct dma_async_tx_descriptor txd;
size_t len;
};
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index ef87a898414..296f9e747fa 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -34,6 +34,7 @@
#include <linux/dmapool.h>
#include <linux/of_platform.h>
+#include <asm/fsldma.h>
#include "fsldma.h"
static void dma_init(struct fsl_dma_chan *fsl_chan)
@@ -280,28 +281,40 @@ static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size)
}
/**
- * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * fsl_chan_set_request_count - Set DMA Request Count for external control
* @fsl_chan : Freescale DMA channel
- * @size : Pause control size, 0 for disable external pause control.
- * The maximum is 1024.
+ * @size : Number of bytes to transfer in a single request
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA request count is how many bytes are allowed to transfer before
+ * pausing the channel, after which a new assertion of DREQ# resumes channel
+ * operation.
*
- * The Freescale DMA channel can be controlled by the external
- * signal DREQ#. The pause control size is how many bytes are allowed
- * to transfer before pausing the channel, after which a new assertion
- * of DREQ# resumes channel operation.
+ * A size of 0 disables external pause control. The maximum size is 1024.
*/
-static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size)
+static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size)
{
- if (size > 1024)
- return;
+ BUG_ON(size > 1024);
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+ | ((__ilog2(size) << 24) & 0x0f000000),
+ 32);
+}
- if (size) {
- DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
- DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
- | ((__ilog2(size) << 24) & 0x0f000000),
- 32);
+/**
+ * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * @fsl_chan : Freescale DMA channel
+ * @enable : 0 is disabled, 1 is enabled.
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA Request Count feature should be used in addition to this feature
+ * to set the number of bytes to transfer before pausing the channel.
+ */
+static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable)
+{
+ if (enable)
fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
- } else
+ else
fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
}
@@ -326,7 +339,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
- struct fsl_desc_sw *desc;
+ struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
+ struct fsl_desc_sw *child;
unsigned long flags;
dma_cookie_t cookie;
@@ -334,7 +348,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
spin_lock_irqsave(&fsl_chan->desc_lock, flags);
cookie = fsl_chan->common.cookie;
- list_for_each_entry(desc, &tx->tx_list, node) {
+ list_for_each_entry(child, &desc->tx_list, node) {
cookie++;
if (cookie < 0)
cookie = 1;
@@ -343,8 +357,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
}
fsl_chan->common.cookie = cookie;
- append_ld_queue(fsl_chan, tx_to_fsl_desc(tx));
- list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev);
+ append_ld_queue(fsl_chan, desc);
+ list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev);
spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
@@ -366,6 +380,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
if (desc_sw) {
memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
+ INIT_LIST_HEAD(&desc_sw->tx_list);
dma_async_tx_descriptor_init(&desc_sw->async_tx,
&fsl_chan->common);
desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
@@ -455,7 +470,7 @@ fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags)
new->async_tx.flags = flags;
/* Insert the link descriptor to the LD ring */
- list_add_tail(&new->node, &new->async_tx.tx_list);
+ list_add_tail(&new->node, &new->tx_list);
/* Set End-of-link to the last link descriptor of new list*/
set_ld_eol(fsl_chan, new);
@@ -513,7 +528,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
dma_dest += copy;
/* Insert the link descriptor to the LD ring */
- list_add_tail(&new->node, &first->async_tx.tx_list);
+ list_add_tail(&new->node, &first->tx_list);
} while (len);
new->async_tx.flags = flags; /* client is in control of this ack */
@@ -528,7 +543,7 @@ fail:
if (!first)
return NULL;
- list = &first->async_tx.tx_list;
+ list = &first->tx_list;
list_for_each_entry_safe_reverse(new, prev, list, node) {
list_del(&new->node);
dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
@@ -538,6 +553,229 @@ fail:
}
/**
+ * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: DMAEngine flags
+ *
+ * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
+ * DMA_SLAVE API, this gets the device-specific information from the
+ * chan->private variable.
+ */
+static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_data_direction direction, unsigned long flags)
+{
+ struct fsl_dma_chan *fsl_chan;
+ struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+ struct fsl_dma_slave *slave;
+ struct list_head *tx_list;
+ size_t copy;
+
+ int i;
+ struct scatterlist *sg;
+ size_t sg_used;
+ size_t hw_used;
+ struct fsl_dma_hw_addr *hw;
+ dma_addr_t dma_dst, dma_src;
+
+ if (!chan)
+ return NULL;
+
+ if (!chan->private)
+ return NULL;
+
+ fsl_chan = to_fsl_chan(chan);
+ slave = chan->private;
+
+ if (list_empty(&slave->addresses))
+ return NULL;
+
+ hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
+ hw_used = 0;
+
+ /*
+ * Build the hardware transaction to copy from the scatterlist to
+ * the hardware, or from the hardware to the scatterlist
+ *
+ * If you are copying from the hardware to the scatterlist and it
+ * takes two hardware entries to fill an entire page, then both
+ * hardware entries will be coalesced into the same page
+ *
+ * If you are copying from the scatterlist to the hardware and a
+ * single page can fill two hardware entries, then the data will
+ * be read out of the page into the first hardware entry, and so on
+ */
+ for_each_sg(sgl, sg, sg_len, i) {
+ sg_used = 0;
+
+ /* Loop until the entire scatterlist entry is used */
+ while (sg_used < sg_dma_len(sg)) {
+
+ /*
+ * If we've used up the current hardware address/length
+ * pair, we need to load a new one
+ *
+ * This is done in a while loop so that descriptors with
+ * length == 0 will be skipped
+ */
+ while (hw_used >= hw->length) {
+
+ /*
+ * If the current hardware entry is the last
+ * entry in the list, we're finished
+ */
+ if (list_is_last(&hw->entry, &slave->addresses))
+ goto finished;
+
+ /* Get the next hardware address/length pair */
+ hw = list_entry(hw->entry.next,
+ struct fsl_dma_hw_addr, entry);
+ hw_used = 0;
+ }
+
+ /* Allocate the link descriptor from DMA pool */
+ new = fsl_dma_alloc_descriptor(fsl_chan);
+ if (!new) {
+ dev_err(fsl_chan->dev, "No free memory for "
+ "link descriptor\n");
+ goto fail;
+ }
+#ifdef FSL_DMA_LD_DEBUG
+ dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+ /*
+ * Calculate the maximum number of bytes to transfer,
+ * making sure it is less than the DMA controller limit
+ */
+ copy = min_t(size_t, sg_dma_len(sg) - sg_used,
+ hw->length - hw_used);
+ copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
+
+ /*
+ * DMA_FROM_DEVICE
+ * from the hardware to the scatterlist
+ *
+ * DMA_TO_DEVICE
+ * from the scatterlist to the hardware
+ */
+ if (direction == DMA_FROM_DEVICE) {
+ dma_src = hw->address + hw_used;
+ dma_dst = sg_dma_address(sg) + sg_used;
+ } else {
+ dma_src = sg_dma_address(sg) + sg_used;
+ dma_dst = hw->address + hw_used;
+ }
+
+ /* Fill in the descriptor */
+ set_desc_cnt(fsl_chan, &new->hw, copy);
+ set_desc_src(fsl_chan, &new->hw, dma_src);
+ set_desc_dest(fsl_chan, &new->hw, dma_dst);
+
+ /*
+ * If this is not the first descriptor, chain the
+ * current descriptor after the previous descriptor
+ */
+ if (!first) {
+ first = new;
+ } else {
+ set_desc_next(fsl_chan, &prev->hw,
+ new->async_tx.phys);
+ }
+
+ new->async_tx.cookie = 0;
+ async_tx_ack(&new->async_tx);
+
+ prev = new;
+ sg_used += copy;
+ hw_used += copy;
+
+ /* Insert the link descriptor into the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+ }
+ }
+
+finished:
+
+ /* All of the hardware address/length pairs had length == 0 */
+ if (!first || !new)
+ return NULL;
+
+ new->async_tx.flags = flags;
+ new->async_tx.cookie = -EBUSY;
+
+ /* Set End-of-link to the last link descriptor of new list */
+ set_ld_eol(fsl_chan, new);
+
+ /* Enable extra controller features */
+ if (fsl_chan->set_src_loop_size)
+ fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size);
+
+ if (fsl_chan->set_dest_loop_size)
+ fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size);
+
+ if (fsl_chan->toggle_ext_start)
+ fsl_chan->toggle_ext_start(fsl_chan, slave->external_start);
+
+ if (fsl_chan->toggle_ext_pause)
+ fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause);
+
+ if (fsl_chan->set_request_count)
+ fsl_chan->set_request_count(fsl_chan, slave->request_count);
+
+ return &first->async_tx;
+
+fail:
+ /* If first was not set, then we failed to allocate the very first
+ * descriptor, and we're done */
+ if (!first)
+ return NULL;
+
+ /*
+ * First is set, so all of the descriptors we allocated have been added
+ * to first->tx_list, INCLUDING "first" itself. Therefore we
+ * must traverse the list backwards freeing each descriptor in turn
+ *
+ * We're re-using variables for the loop, oh well
+ */
+ tx_list = &first->tx_list;
+ list_for_each_entry_safe_reverse(new, prev, tx_list, node) {
+ list_del_init(&new->node);
+ dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
+ }
+
+ return NULL;
+}
+
+static void fsl_dma_device_terminate_all(struct dma_chan *chan)
+{
+ struct fsl_dma_chan *fsl_chan;
+ struct fsl_desc_sw *desc, *tmp;
+ unsigned long flags;
+
+ if (!chan)
+ return;
+
+ fsl_chan = to_fsl_chan(chan);
+
+ /* Halt the DMA engine */
+ dma_halt(fsl_chan);
+
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+ /* Remove and free all of the descriptors in the LD queue */
+ list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) {
+ list_del(&desc->node);
+ dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+ }
+
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+}
+
+/**
* fsl_dma_update_completed_cookie - Update the completed cookie.
* @fsl_chan : Freescale DMA channel
*/
@@ -883,6 +1121,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
+ new_fsl_chan->set_request_count = fsl_chan_set_request_count;
}
spin_lock_init(&new_fsl_chan->desc_lock);
@@ -962,12 +1201,15 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+ dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
fdev->common.device_is_tx_complete = fsl_dma_is_complete;
fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
+ fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
+ fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
fdev->common.dev = &dev->dev;
fdev->irq = irq_of_parse_and_map(dev->node, 0);
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index dc7f2686579..0df14cbb8ca 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -90,6 +90,7 @@ struct fsl_dma_ld_hw {
struct fsl_desc_sw {
struct fsl_dma_ld_hw hw;
struct list_head node;
+ struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
struct list_head *ld;
void *priv;
@@ -143,10 +144,11 @@ struct fsl_dma_chan {
struct tasklet_struct tasklet;
u32 feature;
- void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size);
+ void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable);
void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
+ void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size);
};
#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c
deleted file mode 100644
index 2225bb6ba3d..00000000000
--- a/drivers/dma/ioat.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2007 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-/*
- * This driver supports an Intel I/OAT DMA engine, which does asynchronous
- * copy operations.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dca.h>
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-#include "ioatdma_hw.h"
-
-MODULE_VERSION(IOAT_DMA_VERSION);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Intel Corporation");
-
-static struct pci_device_id ioat_pci_tbl[] = {
- /* I/OAT v1 platforms */
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
- { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
-
- /* I/OAT v2 platforms */
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
-
- /* I/OAT v3 platforms */
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
- { 0, }
-};
-
-struct ioat_device {
- struct pci_dev *pdev;
- void __iomem *iobase;
- struct ioatdma_device *dma;
- struct dca_provider *dca;
-};
-
-static int __devinit ioat_probe(struct pci_dev *pdev,
- const struct pci_device_id *id);
-static void __devexit ioat_remove(struct pci_dev *pdev);
-
-static int ioat_dca_enabled = 1;
-module_param(ioat_dca_enabled, int, 0644);
-MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
-
-static struct pci_driver ioat_pci_driver = {
- .name = "ioatdma",
- .id_table = ioat_pci_tbl,
- .probe = ioat_probe,
- .remove = __devexit_p(ioat_remove),
-};
-
-static int __devinit ioat_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
-{
- void __iomem *iobase;
- struct ioat_device *device;
- unsigned long mmio_start, mmio_len;
- int err;
-
- err = pci_enable_device(pdev);
- if (err)
- goto err_enable_device;
-
- err = pci_request_regions(pdev, ioat_pci_driver.name);
- if (err)
- goto err_request_regions;
-
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err)
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err)
- goto err_set_dma_mask;
-
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err)
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err)
- goto err_set_dma_mask;
-
- mmio_start = pci_resource_start(pdev, 0);
- mmio_len = pci_resource_len(pdev, 0);
- iobase = ioremap(mmio_start, mmio_len);
- if (!iobase) {
- err = -ENOMEM;
- goto err_ioremap;
- }
-
- device = kzalloc(sizeof(*device), GFP_KERNEL);
- if (!device) {
- err = -ENOMEM;
- goto err_kzalloc;
- }
- device->pdev = pdev;
- pci_set_drvdata(pdev, device);
- device->iobase = iobase;
-
- pci_set_master(pdev);
-
- switch (readb(iobase + IOAT_VER_OFFSET)) {
- case IOAT_VER_1_2:
- device->dma = ioat_dma_probe(pdev, iobase);
- if (device->dma && ioat_dca_enabled)
- device->dca = ioat_dca_init(pdev, iobase);
- break;
- case IOAT_VER_2_0:
- device->dma = ioat_dma_probe(pdev, iobase);
- if (device->dma && ioat_dca_enabled)
- device->dca = ioat2_dca_init(pdev, iobase);
- break;
- case IOAT_VER_3_0:
- device->dma = ioat_dma_probe(pdev, iobase);
- if (device->dma && ioat_dca_enabled)
- device->dca = ioat3_dca_init(pdev, iobase);
- break;
- default:
- err = -ENODEV;
- break;
- }
- if (!device->dma)
- err = -ENODEV;
-
- if (err)
- goto err_version;
-
- return 0;
-
-err_version:
- kfree(device);
-err_kzalloc:
- iounmap(iobase);
-err_ioremap:
-err_set_dma_mask:
- pci_release_regions(pdev);
- pci_disable_device(pdev);
-err_request_regions:
-err_enable_device:
- return err;
-}
-
-static void __devexit ioat_remove(struct pci_dev *pdev)
-{
- struct ioat_device *device = pci_get_drvdata(pdev);
-
- dev_err(&pdev->dev, "Removing dma and dca services\n");
- if (device->dca) {
- unregister_dca_provider(device->dca);
- free_dca_provider(device->dca);
- device->dca = NULL;
- }
-
- if (device->dma) {
- ioat_dma_remove(device->dma);
- device->dma = NULL;
- }
-
- kfree(device);
-}
-
-static int __init ioat_init_module(void)
-{
- return pci_register_driver(&ioat_pci_driver);
-}
-module_init(ioat_init_module);
-
-static void __exit ioat_exit_module(void)
-{
- pci_unregister_driver(&ioat_pci_driver);
-}
-module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile
new file mode 100644
index 00000000000..8997d3fb905
--- /dev/null
+++ b/drivers/dma/ioat/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o
diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat/dca.c
index c012a1e1504..69d02615c4d 100644
--- a/drivers/dma/ioat_dca.c
+++ b/drivers/dma/ioat/dca.c
@@ -33,8 +33,8 @@
#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
#endif
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
+#include "dma.h"
+#include "registers.h"
/*
* Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
@@ -242,7 +242,8 @@ static struct dca_ops ioat_dca_ops = {
};
-struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+struct dca_provider * __devinit
+ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
{
struct dca_provider *dca;
struct ioat_dca_priv *ioatdca;
@@ -407,7 +408,8 @@ static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
return slots;
}
-struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+struct dca_provider * __devinit
+ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
{
struct dca_provider *dca;
struct ioat_dca_priv *ioatdca;
@@ -602,7 +604,8 @@ static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
return slots;
}
-struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+struct dca_provider * __devinit
+ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
{
struct dca_provider *dca;
struct ioat_dca_priv *ioatdca;
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
new file mode 100644
index 00000000000..c524d36d3c2
--- /dev/null
+++ b/drivers/dma/ioat/dma.c
@@ -0,0 +1,1238 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+ "high-water mark for pushing ioat descriptors (default: 4)");
+
+/* internal functions */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat);
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+ struct ioatdma_device *instance = data;
+ struct ioat_chan_common *chan;
+ unsigned long attnstatus;
+ int bit;
+ u8 intrctrl;
+
+ intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+ if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+ return IRQ_NONE;
+
+ if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+ return IRQ_NONE;
+ }
+
+ attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+ for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
+ chan = ioat_chan_by_index(instance, bit);
+ tasklet_schedule(&chan->cleanup_task);
+ }
+
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+ return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+ struct ioat_chan_common *chan = data;
+
+ tasklet_schedule(&chan->cleanup_task);
+
+ return IRQ_HANDLED;
+}
+
+static void ioat1_cleanup_tasklet(unsigned long data);
+
+/* common channel initialization */
+void ioat_init_channel(struct ioatdma_device *device,
+ struct ioat_chan_common *chan, int idx,
+ void (*timer_fn)(unsigned long),
+ void (*tasklet)(unsigned long),
+ unsigned long ioat)
+{
+ struct dma_device *dma = &device->common;
+
+ chan->device = device;
+ chan->reg_base = device->reg_base + (0x80 * (idx + 1));
+ spin_lock_init(&chan->cleanup_lock);
+ chan->common.device = dma;
+ list_add_tail(&chan->common.device_node, &dma->channels);
+ device->idx[idx] = chan;
+ init_timer(&chan->timer);
+ chan->timer.function = timer_fn;
+ chan->timer.data = ioat;
+ tasklet_init(&chan->cleanup_task, tasklet, ioat);
+ tasklet_disable(&chan->cleanup_task);
+}
+
+static void ioat1_timer_event(unsigned long data);
+
+/**
+ * ioat1_dma_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+static int ioat1_enumerate_channels(struct ioatdma_device *device)
+{
+ u8 xfercap_scale;
+ u32 xfercap;
+ int i;
+ struct ioat_dma_chan *ioat;
+ struct device *dev = &device->pdev->dev;
+ struct dma_device *dma = &device->common;
+
+ INIT_LIST_HEAD(&dma->channels);
+ dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+ dma->chancnt &= 0x1f; /* bits [4:0] valid */
+ if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+ dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+ dma->chancnt, ARRAY_SIZE(device->idx));
+ dma->chancnt = ARRAY_SIZE(device->idx);
+ }
+ xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+ xfercap_scale &= 0x1f; /* bits [4:0] valid */
+ xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
+ dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
+
+#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
+ if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+ dma->chancnt--;
+#endif
+ for (i = 0; i < dma->chancnt; i++) {
+ ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+ if (!ioat)
+ break;
+
+ ioat_init_channel(device, &ioat->base, i,
+ ioat1_timer_event,
+ ioat1_cleanup_tasklet,
+ (unsigned long) ioat);
+ ioat->xfercap = xfercap;
+ spin_lock_init(&ioat->desc_lock);
+ INIT_LIST_HEAD(&ioat->free_desc);
+ INIT_LIST_HEAD(&ioat->used_desc);
+ }
+ dma->chancnt = i;
+ return i;
+}
+
+/**
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
+ * descriptors to hw
+ * @chan: DMA channel handle
+ */
+static inline void
+__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
+{
+ void __iomem *reg_base = ioat->base.reg_base;
+
+ dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
+ __func__, ioat->pending);
+ ioat->pending = 0;
+ writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
+}
+
+static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(chan);
+
+ if (ioat->pending > 0) {
+ spin_lock_bh(&ioat->desc_lock);
+ __ioat1_dma_memcpy_issue_pending(ioat);
+ spin_unlock_bh(&ioat->desc_lock);
+ }
+}
+
+/**
+ * ioat1_reset_channel - restart a channel
+ * @ioat: IOAT DMA channel handle
+ */
+static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ void __iomem *reg_base = chan->reg_base;
+ u32 chansts, chanerr;
+
+ dev_warn(to_dev(chan), "reset\n");
+ chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
+ chansts = *chan->completion & IOAT_CHANSTS_STATUS;
+ if (chanerr) {
+ dev_err(to_dev(chan),
+ "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
+ chan_num(chan), chansts, chanerr);
+ writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
+ }
+
+ /*
+ * whack it upside the head with a reset
+ * and wait for things to settle out.
+ * force the pending count to a really big negative
+ * to make sure no one forces an issue_pending
+ * while we're waiting.
+ */
+
+ ioat->pending = INT_MIN;
+ writeb(IOAT_CHANCMD_RESET,
+ reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+ set_bit(IOAT_RESET_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + RESET_DELAY);
+}
+
+static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_chan *c = tx->chan;
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_desc_sw *first;
+ struct ioat_desc_sw *chain_tail;
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&ioat->desc_lock);
+ /* cookie incr and addition to used_list must be atomic */
+ cookie = c->cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ c->cookie = cookie;
+ tx->cookie = cookie;
+ dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+ /* write address into NextDescriptor field of last desc in chain */
+ first = to_ioat_desc(desc->tx_list.next);
+ chain_tail = to_ioat_desc(ioat->used_desc.prev);
+ /* make descriptor updates globally visible before chaining */
+ wmb();
+ chain_tail->hw->next = first->txd.phys;
+ list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
+ dump_desc_dbg(ioat, chain_tail);
+ dump_desc_dbg(ioat, first);
+
+ if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ ioat->active += desc->hw->tx_cnt;
+ ioat->pending += desc->hw->tx_cnt;
+ if (ioat->pending >= ioat_pending_level)
+ __ioat1_dma_memcpy_issue_pending(ioat);
+ spin_unlock_bh(&ioat->desc_lock);
+
+ return cookie;
+}
+
+/**
+ * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
+ * @ioat: the channel supplying the memory pool for the descriptors
+ * @flags: allocation flags
+ */
+static struct ioat_desc_sw *
+ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
+{
+ struct ioat_dma_descriptor *desc;
+ struct ioat_desc_sw *desc_sw;
+ struct ioatdma_device *ioatdma_device;
+ dma_addr_t phys;
+
+ ioatdma_device = ioat->base.device;
+ desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
+ if (unlikely(!desc))
+ return NULL;
+
+ desc_sw = kzalloc(sizeof(*desc_sw), flags);
+ if (unlikely(!desc_sw)) {
+ pci_pool_free(ioatdma_device->dma_pool, desc, phys);
+ return NULL;
+ }
+
+ memset(desc, 0, sizeof(*desc));
+
+ INIT_LIST_HEAD(&desc_sw->tx_list);
+ dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
+ desc_sw->txd.tx_submit = ioat1_tx_submit;
+ desc_sw->hw = desc;
+ desc_sw->txd.phys = phys;
+ set_desc_id(desc_sw, -1);
+
+ return desc_sw;
+}
+
+static int ioat_initial_desc_count = 256;
+module_param(ioat_initial_desc_count, int, 0644);
+MODULE_PARM_DESC(ioat_initial_desc_count,
+ "ioat1: initial descriptors per channel (default: 256)");
+/**
+ * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan: the channel to be filled out
+ */
+static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_desc_sw *desc;
+ u32 chanerr;
+ int i;
+ LIST_HEAD(tmp_list);
+
+ /* have we already been set up? */
+ if (!list_empty(&ioat->free_desc))
+ return ioat->desccount;
+
+ /* Setup register to interrupt and write completion status on error */
+ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ if (chanerr) {
+ dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
+ writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+ }
+
+ /* Allocate descriptors */
+ for (i = 0; i < ioat_initial_desc_count; i++) {
+ desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
+ if (!desc) {
+ dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
+ break;
+ }
+ set_desc_id(desc, i);
+ list_add_tail(&desc->node, &tmp_list);
+ }
+ spin_lock_bh(&ioat->desc_lock);
+ ioat->desccount = i;
+ list_splice(&tmp_list, &ioat->free_desc);
+ spin_unlock_bh(&ioat->desc_lock);
+
+ /* allocate a completion writeback area */
+ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+ chan->completion = pci_pool_alloc(chan->device->completion_pool,
+ GFP_KERNEL, &chan->completion_dma);
+ memset(chan->completion, 0, sizeof(*chan->completion));
+ writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+ writel(((u64) chan->completion_dma) >> 32,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+ tasklet_enable(&chan->cleanup_task);
+ ioat1_dma_start_null_desc(ioat); /* give chain to dma device */
+ dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+ __func__, ioat->desccount);
+ return ioat->desccount;
+}
+
+/**
+ * ioat1_dma_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat1_dma_free_chan_resources(struct dma_chan *c)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioatdma_device *ioatdma_device = chan->device;
+ struct ioat_desc_sw *desc, *_desc;
+ int in_use_descs = 0;
+
+ /* Before freeing channel resources first check
+ * if they have been previously allocated for this channel.
+ */
+ if (ioat->desccount == 0)
+ return;
+
+ tasklet_disable(&chan->cleanup_task);
+ del_timer_sync(&chan->timer);
+ ioat1_cleanup(ioat);
+
+ /* Delay 100ms after reset to allow internal DMA logic to quiesce
+ * before removing DMA descriptor resources.
+ */
+ writeb(IOAT_CHANCMD_RESET,
+ chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+ mdelay(100);
+
+ spin_lock_bh(&ioat->desc_lock);
+ list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
+ dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
+ __func__, desc_id(desc));
+ dump_desc_dbg(ioat, desc);
+ in_use_descs++;
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->txd.phys);
+ kfree(desc);
+ }
+ list_for_each_entry_safe(desc, _desc,
+ &ioat->free_desc, node) {
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->txd.phys);
+ kfree(desc);
+ }
+ spin_unlock_bh(&ioat->desc_lock);
+
+ pci_pool_free(ioatdma_device->completion_pool,
+ chan->completion,
+ chan->completion_dma);
+
+ /* one is ok since we left it on there on purpose */
+ if (in_use_descs > 1)
+ dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+ in_use_descs - 1);
+
+ chan->last_completion = 0;
+ chan->completion_dma = 0;
+ ioat->pending = 0;
+ ioat->desccount = 0;
+}
+
+/**
+ * ioat1_dma_get_next_descriptor - return the next available descriptor
+ * @ioat: IOAT DMA channel handle
+ *
+ * Gets the next descriptor from the chain, and must be called with the
+ * channel's desc_lock held. Allocates more descriptors if the channel
+ * has run out.
+ */
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
+{
+ struct ioat_desc_sw *new;
+
+ if (!list_empty(&ioat->free_desc)) {
+ new = to_ioat_desc(ioat->free_desc.next);
+ list_del(&new->node);
+ } else {
+ /* try to get another desc */
+ new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
+ if (!new) {
+ dev_err(to_dev(&ioat->base), "alloc failed\n");
+ return NULL;
+ }
+ }
+ dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
+ __func__, desc_id(new));
+ prefetch(new->hw);
+ return new;
+}
+
+static struct dma_async_tx_descriptor *
+ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_desc_sw *desc;
+ size_t copy;
+ LIST_HEAD(chain);
+ dma_addr_t src = dma_src;
+ dma_addr_t dest = dma_dest;
+ size_t total_len = len;
+ struct ioat_dma_descriptor *hw = NULL;
+ int tx_cnt = 0;
+
+ spin_lock_bh(&ioat->desc_lock);
+ desc = ioat1_dma_get_next_descriptor(ioat);
+ do {
+ if (!desc)
+ break;
+
+ tx_cnt++;
+ copy = min_t(size_t, len, ioat->xfercap);
+
+ hw = desc->hw;
+ hw->size = copy;
+ hw->ctl = 0;
+ hw->src_addr = src;
+ hw->dst_addr = dest;
+
+ list_add_tail(&desc->node, &chain);
+
+ len -= copy;
+ dest += copy;
+ src += copy;
+ if (len) {
+ struct ioat_desc_sw *next;
+
+ async_tx_ack(&desc->txd);
+ next = ioat1_dma_get_next_descriptor(ioat);
+ hw->next = next ? next->txd.phys : 0;
+ dump_desc_dbg(ioat, desc);
+ desc = next;
+ } else
+ hw->next = 0;
+ } while (len);
+
+ if (!desc) {
+ struct ioat_chan_common *chan = &ioat->base;
+
+ dev_err(to_dev(chan),
+ "chan%d - get_next_desc failed\n", chan_num(chan));
+ list_splice(&chain, &ioat->free_desc);
+ spin_unlock_bh(&ioat->desc_lock);
+ return NULL;
+ }
+ spin_unlock_bh(&ioat->desc_lock);
+
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ list_splice(&chain, &desc->tx_list);
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->tx_cnt = tx_cnt;
+ dump_desc_dbg(ioat, desc);
+
+ return &desc->txd;
+}
+
+static void ioat1_cleanup_tasklet(unsigned long data)
+{
+ struct ioat_dma_chan *chan = (void *)data;
+
+ ioat1_cleanup(chan);
+ writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+ size_t len, struct ioat_dma_descriptor *hw)
+{
+ struct pci_dev *pdev = chan->device->pdev;
+ size_t offset = len - hw->size;
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ ioat_unmap(pdev, hw->dst_addr - offset, len,
+ PCI_DMA_FROMDEVICE, flags, 1);
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
+ ioat_unmap(pdev, hw->src_addr - offset, len,
+ PCI_DMA_TODEVICE, flags, 0);
+}
+
+unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
+{
+ unsigned long phys_complete;
+ u64 completion;
+
+ completion = *chan->completion;
+ phys_complete = ioat_chansts_to_addr(completion);
+
+ dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
+ (unsigned long long) phys_complete);
+
+ if (is_ioat_halted(completion)) {
+ u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
+ chanerr);
+
+ /* TODO do something to salvage the situation */
+ }
+
+ return phys_complete;
+}
+
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+ unsigned long *phys_complete)
+{
+ *phys_complete = ioat_get_current_completion(chan);
+ if (*phys_complete == chan->last_completion)
+ return false;
+ clear_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ return true;
+}
+
+static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct list_head *_desc, *n;
+ struct dma_async_tx_descriptor *tx;
+
+ dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
+ __func__, phys_complete);
+ list_for_each_safe(_desc, n, &ioat->used_desc) {
+ struct ioat_desc_sw *desc;
+
+ prefetch(n);
+ desc = list_entry(_desc, typeof(*desc), node);
+ tx = &desc->txd;
+ /*
+ * Incoming DMA requests may use multiple descriptors,
+ * due to exceeding xfercap, perhaps. If so, only the
+ * last one will have a cookie, and require unmapping.
+ */
+ dump_desc_dbg(ioat, desc);
+ if (tx->cookie) {
+ chan->completed_cookie = tx->cookie;
+ tx->cookie = 0;
+ ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+ ioat->active -= desc->hw->tx_cnt;
+ if (tx->callback) {
+ tx->callback(tx->callback_param);
+ tx->callback = NULL;
+ }
+ }
+
+ if (tx->phys != phys_complete) {
+ /*
+ * a completed entry, but not the last, so clean
+ * up if the client is done with the descriptor
+ */
+ if (async_tx_test_ack(tx))
+ list_move_tail(&desc->node, &ioat->free_desc);
+ } else {
+ /*
+ * last used desc. Do not remove, so we can
+ * append from it.
+ */
+
+ /* if nothing else is pending, cancel the
+ * completion timeout
+ */
+ if (n == &ioat->used_desc) {
+ dev_dbg(to_dev(chan),
+ "%s cancel completion timeout\n",
+ __func__);
+ clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ }
+
+ /* TODO check status bits? */
+ break;
+ }
+ }
+
+ chan->last_completion = phys_complete;
+}
+
+/**
+ * ioat1_cleanup - cleanup up finished descriptors
+ * @chan: ioat channel to be cleaned up
+ *
+ * To prevent lock contention we defer cleanup when the locks are
+ * contended with a terminal timeout that forces cleanup and catches
+ * completion notification errors.
+ */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ prefetch(chan->completion);
+
+ if (!spin_trylock_bh(&chan->cleanup_lock))
+ return;
+
+ if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ if (!spin_trylock_bh(&ioat->desc_lock)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ __cleanup(ioat, phys_complete);
+
+ spin_unlock_bh(&ioat->desc_lock);
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat1_timer_event(unsigned long data)
+{
+ struct ioat_dma_chan *ioat = (void *) data;
+ struct ioat_chan_common *chan = &ioat->base;
+
+ dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
+
+ spin_lock_bh(&chan->cleanup_lock);
+ if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
+ struct ioat_desc_sw *desc;
+
+ spin_lock_bh(&ioat->desc_lock);
+
+ /* restart active descriptors */
+ desc = to_ioat_desc(ioat->used_desc.prev);
+ ioat_set_chainaddr(ioat, desc->txd.phys);
+ ioat_start(chan);
+
+ ioat->pending = 0;
+ set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ spin_unlock_bh(&ioat->desc_lock);
+ } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+ unsigned long phys_complete;
+
+ spin_lock_bh(&ioat->desc_lock);
+ /* if we haven't made progress and we have already
+ * acknowledged a pending completion once, then be more
+ * forceful with a restart
+ */
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+ else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+ ioat1_reset_channel(ioat);
+ else {
+ u64 status = ioat_chansts(chan);
+
+ /* manually update the last completion address */
+ if (ioat_chansts_to_addr(status) != 0)
+ *chan->completion = status;
+
+ set_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ }
+ spin_unlock_bh(&ioat->desc_lock);
+ }
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static enum dma_status
+ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+ if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+ return DMA_SUCCESS;
+
+ ioat1_cleanup(ioat);
+
+ return ioat_is_complete(c, cookie, done, used);
+}
+
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_desc_sw *desc;
+ struct ioat_dma_descriptor *hw;
+
+ spin_lock_bh(&ioat->desc_lock);
+
+ desc = ioat1_dma_get_next_descriptor(ioat);
+
+ if (!desc) {
+ dev_err(to_dev(chan),
+ "Unable to start null desc - get next desc failed\n");
+ spin_unlock_bh(&ioat->desc_lock);
+ return;
+ }
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.compl_write = 1;
+ /* set size to non-zero value (channel returns error when size is 0) */
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+ async_tx_ack(&desc->txd);
+ hw->next = 0;
+ list_add_tail(&desc->node, &ioat->used_desc);
+ dump_desc_dbg(ioat, desc);
+
+ ioat_set_chainaddr(ioat, desc->txd.phys);
+ ioat_start(chan);
+ spin_unlock_bh(&ioat->desc_lock);
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void __devinit ioat_dma_test_callback(void *dma_async_param)
+{
+ struct completion *cmp = dma_async_param;
+
+ complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @device: device to be tested
+ */
+int __devinit ioat_dma_self_test(struct ioatdma_device *device)
+{
+ int i;
+ u8 *src;
+ u8 *dest;
+ struct dma_device *dma = &device->common;
+ struct device *dev = &device->pdev->dev;
+ struct dma_chan *dma_chan;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ int err = 0;
+ struct completion cmp;
+ unsigned long tmo;
+ unsigned long flags;
+
+ src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+ dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < IOAT_TEST_SIZE; i++)
+ src[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(dma->channels.next, struct dma_chan,
+ device_node);
+ if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+ dev_err(dev, "selftest cannot allocate chan resource\n");
+ err = -ENODEV;
+ goto out;
+ }
+
+ dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+ dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+ flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
+ DMA_PREP_INTERRUPT;
+ tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
+ IOAT_TEST_SIZE, flags);
+ if (!tx) {
+ dev_err(dev, "Self-test prep failed, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test setup failed, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (tmo == 0 ||
+ dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
+ != DMA_SUCCESS) {
+ dev_err(dev, "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+ dev_err(dev, "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ dma->device_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+ sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+ "set ioat interrupt style: msix (default), "
+ "msix-single-vector, msi, intx)");
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @device: ioat device
+ */
+static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
+{
+ struct ioat_chan_common *chan;
+ struct pci_dev *pdev = device->pdev;
+ struct device *dev = &pdev->dev;
+ struct msix_entry *msix;
+ int i, j, msixcnt;
+ int err = -EINVAL;
+ u8 intrctrl = 0;
+
+ if (!strcmp(ioat_interrupt_style, "msix"))
+ goto msix;
+ if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
+ goto msix_single_vector;
+ if (!strcmp(ioat_interrupt_style, "msi"))
+ goto msi;
+ if (!strcmp(ioat_interrupt_style, "intx"))
+ goto intx;
+ dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
+ goto err_no_irq;
+
+msix:
+ /* The number of MSI-X vectors should equal the number of channels */
+ msixcnt = device->common.chancnt;
+ for (i = 0; i < msixcnt; i++)
+ device->msix_entries[i].entry = i;
+
+ err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
+ if (err < 0)
+ goto msi;
+ if (err > 0)
+ goto msix_single_vector;
+
+ for (i = 0; i < msixcnt; i++) {
+ msix = &device->msix_entries[i];
+ chan = ioat_chan_by_index(device, i);
+ err = devm_request_irq(dev, msix->vector,
+ ioat_dma_do_interrupt_msix, 0,
+ "ioat-msix", chan);
+ if (err) {
+ for (j = 0; j < i; j++) {
+ msix = &device->msix_entries[j];
+ chan = ioat_chan_by_index(device, j);
+ devm_free_irq(dev, msix->vector, chan);
+ }
+ goto msix_single_vector;
+ }
+ }
+ intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+ goto done;
+
+msix_single_vector:
+ msix = &device->msix_entries[0];
+ msix->entry = 0;
+ err = pci_enable_msix(pdev, device->msix_entries, 1);
+ if (err)
+ goto msi;
+
+ err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
+ "ioat-msix", device);
+ if (err) {
+ pci_disable_msix(pdev);
+ goto msi;
+ }
+ goto done;
+
+msi:
+ err = pci_enable_msi(pdev);
+ if (err)
+ goto intx;
+
+ err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
+ "ioat-msi", device);
+ if (err) {
+ pci_disable_msi(pdev);
+ goto intx;
+ }
+ goto done;
+
+intx:
+ err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
+ IRQF_SHARED, "ioat-intx", device);
+ if (err)
+ goto err_no_irq;
+
+done:
+ if (device->intr_quirk)
+ device->intr_quirk(device);
+ intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+ writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
+ return 0;
+
+err_no_irq:
+ /* Disable all interrupt generation */
+ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+ dev_err(dev, "no usable interrupts\n");
+ return err;
+}
+
+static void ioat_disable_interrupts(struct ioatdma_device *device)
+{
+ /* Disable all interrupt generation */
+ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device)
+{
+ int err = -ENODEV;
+ struct dma_device *dma = &device->common;
+ struct pci_dev *pdev = device->pdev;
+ struct device *dev = &pdev->dev;
+
+ /* DMA coherent memory pool for DMA descriptor allocations */
+ device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
+ sizeof(struct ioat_dma_descriptor),
+ 64, 0);
+ if (!device->dma_pool) {
+ err = -ENOMEM;
+ goto err_dma_pool;
+ }
+
+ device->completion_pool = pci_pool_create("completion_pool", pdev,
+ sizeof(u64), SMP_CACHE_BYTES,
+ SMP_CACHE_BYTES);
+
+ if (!device->completion_pool) {
+ err = -ENOMEM;
+ goto err_completion_pool;
+ }
+
+ device->enumerate_channels(device);
+
+ dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+ dma->dev = &pdev->dev;
+
+ if (!dma->chancnt) {
+ dev_err(dev, "zero channels detected\n");
+ goto err_setup_interrupts;
+ }
+
+ err = ioat_dma_setup_interrupts(device);
+ if (err)
+ goto err_setup_interrupts;
+
+ err = device->self_test(device);
+ if (err)
+ goto err_self_test;
+
+ return 0;
+
+err_self_test:
+ ioat_disable_interrupts(device);
+err_setup_interrupts:
+ pci_pool_destroy(device->completion_pool);
+err_completion_pool:
+ pci_pool_destroy(device->dma_pool);
+err_dma_pool:
+ return err;
+}
+
+int __devinit ioat_register(struct ioatdma_device *device)
+{
+ int err = dma_async_device_register(&device->common);
+
+ if (err) {
+ ioat_disable_interrupts(device);
+ pci_pool_destroy(device->completion_pool);
+ pci_pool_destroy(device->dma_pool);
+ }
+
+ return err;
+}
+
+/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
+static void ioat1_intr_quirk(struct ioatdma_device *device)
+{
+ struct pci_dev *pdev = device->pdev;
+ u32 dmactrl;
+
+ pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
+ if (pdev->msi_enabled)
+ dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
+ else
+ dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
+ pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+ return sprintf(page, "%d\n", ioat->desccount);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+ return sprintf(page, "%d\n", ioat->active);
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static ssize_t cap_show(struct dma_chan *c, char *page)
+{
+ struct dma_device *dma = c->device;
+
+ return sprintf(page, "copy%s%s%s%s%s%s\n",
+ dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+ dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+ dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+ dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+ dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "",
+ dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+
+}
+struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
+
+static ssize_t version_show(struct dma_chan *c, char *page)
+{
+ struct dma_device *dma = c->device;
+ struct ioatdma_device *device = to_ioatdma_device(dma);
+
+ return sprintf(page, "%d.%d\n",
+ device->version >> 4, device->version & 0xf);
+}
+struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
+
+static struct attribute *ioat1_attrs[] = {
+ &ring_size_attr.attr,
+ &ring_active_attr.attr,
+ &ioat_cap_attr.attr,
+ &ioat_version_attr.attr,
+ NULL,
+};
+
+static ssize_t
+ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct ioat_sysfs_entry *entry;
+ struct ioat_chan_common *chan;
+
+ entry = container_of(attr, struct ioat_sysfs_entry, attr);
+ chan = container_of(kobj, struct ioat_chan_common, kobj);
+
+ if (!entry->show)
+ return -EIO;
+ return entry->show(&chan->common, page);
+}
+
+struct sysfs_ops ioat_sysfs_ops = {
+ .show = ioat_attr_show,
+};
+
+static struct kobj_type ioat1_ktype = {
+ .sysfs_ops = &ioat_sysfs_ops,
+ .default_attrs = ioat1_attrs,
+};
+
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
+{
+ struct dma_device *dma = &device->common;
+ struct dma_chan *c;
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ struct ioat_chan_common *chan = to_chan_common(c);
+ struct kobject *parent = &c->dev->device.kobj;
+ int err;
+
+ err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
+ if (err) {
+ dev_warn(to_dev(chan),
+ "sysfs init error (%d), continuing...\n", err);
+ kobject_put(&chan->kobj);
+ set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
+ }
+ }
+}
+
+void ioat_kobject_del(struct ioatdma_device *device)
+{
+ struct dma_device *dma = &device->common;
+ struct dma_chan *c;
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ struct ioat_chan_common *chan = to_chan_common(c);
+
+ if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
+ kobject_del(&chan->kobj);
+ kobject_put(&chan->kobj);
+ }
+ }
+}
+
+int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
+{
+ struct pci_dev *pdev = device->pdev;
+ struct dma_device *dma;
+ int err;
+
+ device->intr_quirk = ioat1_intr_quirk;
+ device->enumerate_channels = ioat1_enumerate_channels;
+ device->self_test = ioat_dma_self_test;
+ dma = &device->common;
+ dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
+ dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
+ dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
+ dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
+ dma->device_is_tx_complete = ioat1_dma_is_complete;
+
+ err = ioat_probe(device);
+ if (err)
+ return err;
+ ioat_set_tcp_copy_break(4096);
+ err = ioat_register(device);
+ if (err)
+ return err;
+ ioat_kobject_add(device, &ioat1_ktype);
+
+ if (dca)
+ device->dca = ioat_dca_init(pdev, device->reg_base);
+
+ return err;
+}
+
+void __devexit ioat_dma_remove(struct ioatdma_device *device)
+{
+ struct dma_device *dma = &device->common;
+
+ ioat_disable_interrupts(device);
+
+ ioat_kobject_del(device);
+
+ dma_async_device_unregister(dma);
+
+ pci_pool_destroy(device->dma_pool);
+ pci_pool_destroy(device->completion_pool);
+
+ INIT_LIST_HEAD(&dma->channels);
+}
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
new file mode 100644
index 00000000000..c14fdfeb7f3
--- /dev/null
+++ b/drivers/dma/ioat/dma.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include "hw.h"
+#include "registers.h"
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <net/tcp.h>
+
+#define IOAT_DMA_VERSION "4.00"
+
+#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
+#define IOAT_DMA_DCA_ANY_CPU ~0
+
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
+#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
+#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
+#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ * @dca: direct cache access context
+ * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
+ * @enumerate_channels: hw version specific channel enumeration
+ * @cleanup_tasklet: select between the v2 and v3 cleanup routines
+ * @timer_fn: select between the v2 and v3 timer watchdog routines
+ * @self_test: hardware version specific self test for each supported op type
+ *
+ * Note: the v3 cleanup routine supports raid operations
+ */
+struct ioatdma_device {
+ struct pci_dev *pdev;
+ void __iomem *reg_base;
+ struct pci_pool *dma_pool;
+ struct pci_pool *completion_pool;
+ struct dma_device common;
+ u8 version;
+ struct msix_entry msix_entries[4];
+ struct ioat_chan_common *idx[4];
+ struct dca_provider *dca;
+ void (*intr_quirk)(struct ioatdma_device *device);
+ int (*enumerate_channels)(struct ioatdma_device *device);
+ void (*cleanup_tasklet)(unsigned long data);
+ void (*timer_fn)(unsigned long data);
+ int (*self_test)(struct ioatdma_device *device);
+};
+
+struct ioat_chan_common {
+ struct dma_chan common;
+ void __iomem *reg_base;
+ unsigned long last_completion;
+ spinlock_t cleanup_lock;
+ dma_cookie_t completed_cookie;
+ unsigned long state;
+ #define IOAT_COMPLETION_PENDING 0
+ #define IOAT_COMPLETION_ACK 1
+ #define IOAT_RESET_PENDING 2
+ #define IOAT_KOBJ_INIT_FAIL 3
+ struct timer_list timer;
+ #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
+ #define IDLE_TIMEOUT msecs_to_jiffies(2000)
+ #define RESET_DELAY msecs_to_jiffies(100)
+ struct ioatdma_device *device;
+ dma_addr_t completion_dma;
+ u64 *completion;
+ struct tasklet_struct cleanup_task;
+ struct kobject kobj;
+};
+
+struct ioat_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct dma_chan *, char *);
+};
+
+/**
+ * struct ioat_dma_chan - internal representation of a DMA channel
+ */
+struct ioat_dma_chan {
+ struct ioat_chan_common base;
+
+ size_t xfercap; /* XFERCAP register value expanded out */
+
+ spinlock_t desc_lock;
+ struct list_head free_desc;
+ struct list_head used_desc;
+
+ int pending;
+ u16 desccount;
+ u16 active;
+};
+
+static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
+{
+ return container_of(c, struct ioat_chan_common, common);
+}
+
+static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
+{
+ struct ioat_chan_common *chan = to_chan_common(c);
+
+ return container_of(chan, struct ioat_dma_chan, base);
+}
+
+/**
+ * ioat_is_complete - poll the status of an ioat transaction
+ * @c: channel handle
+ * @cookie: transaction identifier
+ * @done: if set, updated with last completed transaction
+ * @used: if set, updated with last used transaction
+ */
+static inline enum dma_status
+ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat_chan_common *chan = to_chan_common(c);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+
+ last_used = c->cookie;
+ last_complete = chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+/* wrapper around hardware descriptor format + additional software fields */
+
+/**
+ * struct ioat_desc_sw - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @node: this descriptor will either be on the free list,
+ * or attached to a transaction list (tx_list)
+ * @txd: the generic software descriptor for all engines
+ * @id: identifier for debug
+ */
+struct ioat_desc_sw {
+ struct ioat_dma_descriptor *hw;
+ struct list_head node;
+ size_t len;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ #ifdef DEBUG
+ int id;
+ #endif
+};
+
+#ifdef DEBUG
+#define set_desc_id(desc, i) ((desc)->id = (i))
+#define desc_id(desc) ((desc)->id)
+#else
+#define set_desc_id(desc, i)
+#define desc_id(desc) (0)
+#endif
+
+static inline void
+__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
+ struct dma_async_tx_descriptor *tx, int id)
+{
+ struct device *dev = to_dev(chan);
+
+ dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
+ " ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
+ (unsigned long long) tx->phys,
+ (unsigned long long) hw->next, tx->cookie, tx->flags,
+ hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
+}
+
+#define dump_desc_dbg(c, d) \
+ ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
+
+static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
+{
+ #ifdef CONFIG_NET_DMA
+ sysctl_tcp_dma_copybreak = copybreak;
+ #endif
+}
+
+static inline struct ioat_chan_common *
+ioat_chan_by_index(struct ioatdma_device *device, int index)
+{
+ return device->idx[index];
+}
+
+static inline u64 ioat_chansts(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+ u64 status;
+ u32 status_lo;
+
+ /* We need to read the low address first as this causes the
+ * chipset to latch the upper bits for the subsequent read
+ */
+ status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
+ status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
+ status <<= 32;
+ status |= status_lo;
+
+ return status;
+}
+
+static inline void ioat_start(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+
+ writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline u64 ioat_chansts_to_addr(u64 status)
+{
+ return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+}
+
+static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
+{
+ return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+}
+
+static inline void ioat_suspend(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+
+ writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ writel(addr & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+ writel(addr >> 32,
+ chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+}
+
+static inline bool is_ioat_active(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline bool is_ioat_idle(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
+}
+
+static inline bool is_ioat_halted(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline bool is_ioat_suspended(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+/* channel was fatally programmed */
+static inline bool is_ioat_bug(unsigned long err)
+{
+ return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR|
+ IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR|
+ IOAT_CHANERR_LENGTH_ERR));
+}
+
+static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
+ int direction, enum dma_ctrl_flags flags, bool dst)
+{
+ if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
+ (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
+ pci_unmap_single(pdev, addr, len, direction);
+ else
+ pci_unmap_page(pdev, addr, len, direction);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device);
+int __devinit ioat_register(struct ioatdma_device *device);
+int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat_dma_self_test(struct ioatdma_device *device);
+void __devexit ioat_dma_remove(struct ioatdma_device *device);
+struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
+ void __iomem *iobase);
+unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
+void ioat_init_channel(struct ioatdma_device *device,
+ struct ioat_chan_common *chan, int idx,
+ void (*timer_fn)(unsigned long),
+ void (*tasklet)(unsigned long),
+ unsigned long ioat);
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+ size_t len, struct ioat_dma_descriptor *hw);
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+ unsigned long *phys_complete);
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
+void ioat_kobject_del(struct ioatdma_device *device);
+extern struct sysfs_ops ioat_sysfs_ops;
+extern struct ioat_sysfs_entry ioat_version_attr;
+extern struct ioat_sysfs_entry ioat_cap_attr;
+#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
new file mode 100644
index 00000000000..96ffab7d37a
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.c
@@ -0,0 +1,871 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
+ * does asynchronous data movement and checksumming operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+int ioat_ring_alloc_order = 8;
+module_param(ioat_ring_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_alloc_order,
+ "ioat2+: allocate 2^n descriptors per channel"
+ " (default: 8 max: 16)");
+static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
+module_param(ioat_ring_max_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_max_alloc_order,
+ "ioat2+: upper limit for ring size (default: 16)");
+
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
+{
+ void * __iomem reg_base = ioat->base.reg_base;
+
+ ioat->pending = 0;
+ ioat->dmacount += ioat2_ring_pending(ioat);
+ ioat->issued = ioat->head;
+ /* make descriptor updates globally visible before notifying channel */
+ wmb();
+ writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+ dev_dbg(to_dev(&ioat->base),
+ "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+}
+
+void ioat2_issue_pending(struct dma_chan *chan)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
+
+ spin_lock_bh(&ioat->ring_lock);
+ if (ioat->pending == 1)
+ __ioat2_issue_pending(ioat);
+ spin_unlock_bh(&ioat->ring_lock);
+}
+
+/**
+ * ioat2_update_pending - log pending descriptors
+ * @ioat: ioat2+ channel
+ *
+ * set pending to '1' unless pending is already set to '2', pending == 2
+ * indicates that submission is temporarily blocked due to an in-flight
+ * reset. If we are already above the ioat_pending_level threshold then
+ * just issue pending.
+ *
+ * called with ring_lock held
+ */
+static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
+{
+ if (unlikely(ioat->pending == 2))
+ return;
+ else if (ioat2_ring_pending(ioat) > ioat_pending_level)
+ __ioat2_issue_pending(ioat);
+ else
+ ioat->pending = 1;
+}
+
+static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_ring_ent *desc;
+ struct ioat_dma_descriptor *hw;
+ int idx;
+
+ if (ioat2_ring_space(ioat) < 1) {
+ dev_err(to_dev(&ioat->base),
+ "Unable to start null desc - ring full\n");
+ return;
+ }
+
+ dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued);
+ idx = ioat2_desc_alloc(ioat, 1);
+ desc = ioat2_get_ring_ent(ioat, idx);
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.compl_write = 1;
+ /* set size to non-zero value (channel returns error when size is 0) */
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+ async_tx_ack(&desc->txd);
+ ioat2_set_chainaddr(ioat, desc->txd.phys);
+ dump_desc_dbg(ioat, desc);
+ __ioat2_issue_pending(ioat);
+}
+
+static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+ spin_lock_bh(&ioat->ring_lock);
+ __ioat2_start_null_desc(ioat);
+ spin_unlock_bh(&ioat->ring_lock);
+}
+
+static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct dma_async_tx_descriptor *tx;
+ struct ioat_ring_ent *desc;
+ bool seen_current = false;
+ u16 active;
+ int i;
+
+ dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued);
+
+ active = ioat2_ring_active(ioat);
+ for (i = 0; i < active && !seen_current; i++) {
+ prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
+ desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+ tx = &desc->txd;
+ dump_desc_dbg(ioat, desc);
+ if (tx->cookie) {
+ ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+ chan->completed_cookie = tx->cookie;
+ tx->cookie = 0;
+ if (tx->callback) {
+ tx->callback(tx->callback_param);
+ tx->callback = NULL;
+ }
+ }
+
+ if (tx->phys == phys_complete)
+ seen_current = true;
+ }
+ ioat->tail += i;
+ BUG_ON(!seen_current); /* no active descs have written a completion? */
+
+ chan->last_completion = phys_complete;
+ if (ioat->head == ioat->tail) {
+ dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+ __func__);
+ clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+}
+
+/**
+ * ioat2_cleanup - clean finished descriptors (advance tail pointer)
+ * @chan: ioat channel to be cleaned up
+ */
+static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ prefetch(chan->completion);
+
+ if (!spin_trylock_bh(&chan->cleanup_lock))
+ return;
+
+ if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ if (!spin_trylock_bh(&ioat->ring_lock)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ __cleanup(ioat, phys_complete);
+
+ spin_unlock_bh(&ioat->ring_lock);
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+void ioat2_cleanup_tasklet(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+
+ ioat2_cleanup(ioat);
+ writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ /* set the tail to be re-issued */
+ ioat->issued = ioat->tail;
+ ioat->dmacount = 0;
+ set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ dev_dbg(to_dev(chan),
+ "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+
+ if (ioat2_ring_pending(ioat)) {
+ struct ioat_ring_ent *desc;
+
+ desc = ioat2_get_ring_ent(ioat, ioat->tail);
+ ioat2_set_chainaddr(ioat, desc->txd.phys);
+ __ioat2_issue_pending(ioat);
+ } else
+ __ioat2_start_null_desc(ioat);
+}
+
+static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+ u32 status;
+
+ status = ioat_chansts(chan);
+ if (is_ioat_active(status) || is_ioat_idle(status))
+ ioat_suspend(chan);
+ while (is_ioat_active(status) || is_ioat_idle(status)) {
+ status = ioat_chansts(chan);
+ cpu_relax();
+ }
+
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+
+ __ioat2_restart_chan(ioat);
+}
+
+void ioat2_timer_event(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+ struct ioat_chan_common *chan = &ioat->base;
+
+ spin_lock_bh(&chan->cleanup_lock);
+ if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+ unsigned long phys_complete;
+ u64 status;
+
+ spin_lock_bh(&ioat->ring_lock);
+ status = ioat_chansts(chan);
+
+ /* when halted due to errors check for channel
+ * programming errors before advancing the completion state
+ */
+ if (is_ioat_halted(status)) {
+ u32 chanerr;
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ BUG_ON(is_ioat_bug(chanerr));
+ }
+
+ /* if we haven't made progress and we have already
+ * acknowledged a pending completion once, then be more
+ * forceful with a restart
+ */
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+ else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+ ioat2_restart_channel(ioat);
+ else {
+ set_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ }
+ spin_unlock_bh(&ioat->ring_lock);
+ } else {
+ u16 active;
+
+ /* if the ring is idle, empty, and oversized try to step
+ * down the size
+ */
+ spin_lock_bh(&ioat->ring_lock);
+ active = ioat2_ring_active(ioat);
+ if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+ reshape_ring(ioat, ioat->alloc_order-1);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ /* keep shrinking until we get back to our minimum
+ * default size
+ */
+ if (ioat->alloc_order > ioat_get_alloc_order())
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+/**
+ * ioat2_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+int ioat2_enumerate_channels(struct ioatdma_device *device)
+{
+ struct ioat2_dma_chan *ioat;
+ struct device *dev = &device->pdev->dev;
+ struct dma_device *dma = &device->common;
+ u8 xfercap_log;
+ int i;
+
+ INIT_LIST_HEAD(&dma->channels);
+ dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+ dma->chancnt &= 0x1f; /* bits [4:0] valid */
+ if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+ dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+ dma->chancnt, ARRAY_SIZE(device->idx));
+ dma->chancnt = ARRAY_SIZE(device->idx);
+ }
+ xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+ xfercap_log &= 0x1f; /* bits [4:0] valid */
+ if (xfercap_log == 0)
+ return 0;
+ dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
+
+ /* FIXME which i/oat version is i7300? */
+#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
+ if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+ dma->chancnt--;
+#endif
+ for (i = 0; i < dma->chancnt; i++) {
+ ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+ if (!ioat)
+ break;
+
+ ioat_init_channel(device, &ioat->base, i,
+ device->timer_fn,
+ device->cleanup_tasklet,
+ (unsigned long) ioat);
+ ioat->xfercap_log = xfercap_log;
+ spin_lock_init(&ioat->ring_lock);
+ }
+ dma->chancnt = i;
+ return i;
+}
+
+static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_chan *c = tx->chan;
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ dma_cookie_t cookie = c->cookie;
+
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ tx->cookie = cookie;
+ c->cookie = cookie;
+ dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+ if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ ioat2_update_pending(ioat);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ return cookie;
+}
+
+static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
+{
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *desc;
+ struct ioatdma_device *dma;
+ dma_addr_t phys;
+
+ dma = to_ioatdma_device(chan->device);
+ hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
+ if (!hw)
+ return NULL;
+ memset(hw, 0, sizeof(*hw));
+
+ desc = kmem_cache_alloc(ioat2_cache, flags);
+ if (!desc) {
+ pci_pool_free(dma->dma_pool, hw, phys);
+ return NULL;
+ }
+ memset(desc, 0, sizeof(*desc));
+
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.tx_submit = ioat2_tx_submit_unlock;
+ desc->hw = hw;
+ desc->txd.phys = phys;
+ return desc;
+}
+
+static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
+{
+ struct ioatdma_device *dma;
+
+ dma = to_ioatdma_device(chan->device);
+ pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
+ kmem_cache_free(ioat2_cache, desc);
+}
+
+static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
+{
+ struct ioat_ring_ent **ring;
+ int descs = 1 << order;
+ int i;
+
+ if (order > ioat_get_max_alloc_order())
+ return NULL;
+
+ /* allocate the array to hold the software ring */
+ ring = kcalloc(descs, sizeof(*ring), flags);
+ if (!ring)
+ return NULL;
+ for (i = 0; i < descs; i++) {
+ ring[i] = ioat2_alloc_ring_ent(c, flags);
+ if (!ring[i]) {
+ while (i--)
+ ioat2_free_ring_ent(ring[i], c);
+ kfree(ring);
+ return NULL;
+ }
+ set_desc_id(ring[i], i);
+ }
+
+ /* link descs */
+ for (i = 0; i < descs-1; i++) {
+ struct ioat_ring_ent *next = ring[i+1];
+ struct ioat_dma_descriptor *hw = ring[i]->hw;
+
+ hw->next = next->txd.phys;
+ }
+ ring[i]->hw->next = ring[0]->txd.phys;
+
+ return ring;
+}
+
+/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
+ * @chan: channel to be initialized
+ */
+int ioat2_alloc_chan_resources(struct dma_chan *c)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_ring_ent **ring;
+ u32 chanerr;
+ int order;
+
+ /* have we already been set up? */
+ if (ioat->ring)
+ return 1 << ioat->alloc_order;
+
+ /* Setup register to interrupt and write completion status on error */
+ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ if (chanerr) {
+ dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
+ writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+ }
+
+ /* allocate a completion writeback area */
+ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+ chan->completion = pci_pool_alloc(chan->device->completion_pool,
+ GFP_KERNEL, &chan->completion_dma);
+ if (!chan->completion)
+ return -ENOMEM;
+
+ memset(chan->completion, 0, sizeof(*chan->completion));
+ writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+ writel(((u64) chan->completion_dma) >> 32,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+ order = ioat_get_alloc_order();
+ ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
+ if (!ring)
+ return -ENOMEM;
+
+ spin_lock_bh(&ioat->ring_lock);
+ ioat->ring = ring;
+ ioat->head = 0;
+ ioat->issued = 0;
+ ioat->tail = 0;
+ ioat->pending = 0;
+ ioat->alloc_order = order;
+ spin_unlock_bh(&ioat->ring_lock);
+
+ tasklet_enable(&chan->cleanup_task);
+ ioat2_start_null_desc(ioat);
+
+ return 1 << ioat->alloc_order;
+}
+
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
+{
+ /* reshape differs from normal ring allocation in that we want
+ * to allocate a new software ring while only
+ * extending/truncating the hardware ring
+ */
+ struct ioat_chan_common *chan = &ioat->base;
+ struct dma_chan *c = &chan->common;
+ const u16 curr_size = ioat2_ring_mask(ioat) + 1;
+ const u16 active = ioat2_ring_active(ioat);
+ const u16 new_size = 1 << order;
+ struct ioat_ring_ent **ring;
+ u16 i;
+
+ if (order > ioat_get_max_alloc_order())
+ return false;
+
+ /* double check that we have at least 1 free descriptor */
+ if (active == curr_size)
+ return false;
+
+ /* when shrinking, verify that we can hold the current active
+ * set in the new ring
+ */
+ if (active >= new_size)
+ return false;
+
+ /* allocate the array to hold the software ring */
+ ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
+ if (!ring)
+ return false;
+
+ /* allocate/trim descriptors as needed */
+ if (new_size > curr_size) {
+ /* copy current descriptors to the new ring */
+ for (i = 0; i < curr_size; i++) {
+ u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ring[new_idx] = ioat->ring[curr_idx];
+ set_desc_id(ring[new_idx], new_idx);
+ }
+
+ /* add new descriptors to the ring */
+ for (i = curr_size; i < new_size; i++) {
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
+ if (!ring[new_idx]) {
+ while (i--) {
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ioat2_free_ring_ent(ring[new_idx], c);
+ }
+ kfree(ring);
+ return false;
+ }
+ set_desc_id(ring[new_idx], new_idx);
+ }
+
+ /* hw link new descriptors */
+ for (i = curr_size-1; i < new_size; i++) {
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+ struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
+ struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
+
+ hw->next = next->txd.phys;
+ }
+ } else {
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *next;
+
+ /* copy current descriptors to the new ring, dropping the
+ * removed descriptors
+ */
+ for (i = 0; i < new_size; i++) {
+ u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ring[new_idx] = ioat->ring[curr_idx];
+ set_desc_id(ring[new_idx], new_idx);
+ }
+
+ /* free deleted descriptors */
+ for (i = new_size; i < curr_size; i++) {
+ struct ioat_ring_ent *ent;
+
+ ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
+ ioat2_free_ring_ent(ent, c);
+ }
+
+ /* fix up hardware ring */
+ hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
+ next = ring[(ioat->tail+new_size) & (new_size-1)];
+ hw->next = next->txd.phys;
+ }
+
+ dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+ __func__, new_size);
+
+ kfree(ioat->ring);
+ ioat->ring = ring;
+ ioat->alloc_order = order;
+
+ return true;
+}
+
+/**
+ * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops
+ * @idx: gets starting descriptor index on successful allocation
+ * @ioat: ioat2,3 channel (ring) to operate on
+ * @num_descs: allocation length
+ */
+int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ spin_lock_bh(&ioat->ring_lock);
+ /* never allow the last descriptor to be consumed, we need at
+ * least one free at all times to allow for on-the-fly ring
+ * resizing.
+ */
+ while (unlikely(ioat2_ring_space(ioat) <= num_descs)) {
+ if (reshape_ring(ioat, ioat->alloc_order + 1) &&
+ ioat2_ring_space(ioat) > num_descs)
+ break;
+
+ if (printk_ratelimit())
+ dev_dbg(to_dev(chan),
+ "%s: ring full! num_descs: %d (%x:%x:%x)\n",
+ __func__, num_descs, ioat->head, ioat->tail,
+ ioat->issued);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ /* progress reclaim in the allocation failure case we
+ * may be called under bh_disabled so we need to trigger
+ * the timer event directly
+ */
+ spin_lock_bh(&chan->cleanup_lock);
+ if (jiffies > chan->timer.expires &&
+ timer_pending(&chan->timer)) {
+ struct ioatdma_device *device = chan->device;
+
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ spin_unlock_bh(&chan->cleanup_lock);
+ device->timer_fn((unsigned long) ioat);
+ } else
+ spin_unlock_bh(&chan->cleanup_lock);
+ return -ENOMEM;
+ }
+
+ dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
+ __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
+
+ *idx = ioat2_desc_alloc(ioat, num_descs);
+ return 0; /* with ioat->ring_lock held */
+}
+
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *desc;
+ dma_addr_t dst = dma_dest;
+ dma_addr_t src = dma_src;
+ size_t total_len = len;
+ int num_descs;
+ u16 idx;
+ int i;
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ hw = desc->hw;
+
+ hw->size = copy;
+ hw->ctl = 0;
+ hw->src_addr = src;
+ hw->dst_addr = dst;
+
+ len -= copy;
+ dst += copy;
+ src += copy;
+ dump_desc_dbg(ioat, desc);
+ } while (++i < num_descs);
+
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ hw->ctl_f.compl_write = 1;
+ dump_desc_dbg(ioat, desc);
+ /* we leave the channel locked to ensure in order submission */
+
+ return &desc->txd;
+}
+
+/**
+ * ioat2_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+void ioat2_free_chan_resources(struct dma_chan *c)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioatdma_device *device = chan->device;
+ struct ioat_ring_ent *desc;
+ const u16 total_descs = 1 << ioat->alloc_order;
+ int descs;
+ int i;
+
+ /* Before freeing channel resources first check
+ * if they have been previously allocated for this channel.
+ */
+ if (!ioat->ring)
+ return;
+
+ tasklet_disable(&chan->cleanup_task);
+ del_timer_sync(&chan->timer);
+ device->cleanup_tasklet((unsigned long) ioat);
+
+ /* Delay 100ms after reset to allow internal DMA logic to quiesce
+ * before removing DMA descriptor resources.
+ */
+ writeb(IOAT_CHANCMD_RESET,
+ chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+ mdelay(100);
+
+ spin_lock_bh(&ioat->ring_lock);
+ descs = ioat2_ring_space(ioat);
+ dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
+ for (i = 0; i < descs; i++) {
+ desc = ioat2_get_ring_ent(ioat, ioat->head + i);
+ ioat2_free_ring_ent(desc, c);
+ }
+
+ if (descs < total_descs)
+ dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+ total_descs - descs);
+
+ for (i = 0; i < total_descs - descs; i++) {
+ desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+ dump_desc_dbg(ioat, desc);
+ ioat2_free_ring_ent(desc, c);
+ }
+
+ kfree(ioat->ring);
+ ioat->ring = NULL;
+ ioat->alloc_order = 0;
+ pci_pool_free(device->completion_pool, chan->completion,
+ chan->completion_dma);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ chan->last_completion = 0;
+ chan->completion_dma = 0;
+ ioat->pending = 0;
+ ioat->dmacount = 0;
+}
+
+enum dma_status
+ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioatdma_device *device = ioat->base.device;
+
+ if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+ return DMA_SUCCESS;
+
+ device->cleanup_tasklet((unsigned long) ioat);
+
+ return ioat_is_complete(c, cookie, done, used);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+ return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+ /* ...taken outside the lock, no need to be precise */
+ return sprintf(page, "%d\n", ioat2_ring_active(ioat));
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static struct attribute *ioat2_attrs[] = {
+ &ring_size_attr.attr,
+ &ring_active_attr.attr,
+ &ioat_cap_attr.attr,
+ &ioat_version_attr.attr,
+ NULL,
+};
+
+struct kobj_type ioat2_ktype = {
+ .sysfs_ops = &ioat_sysfs_ops,
+ .default_attrs = ioat2_attrs,
+};
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
+{
+ struct pci_dev *pdev = device->pdev;
+ struct dma_device *dma;
+ struct dma_chan *c;
+ struct ioat_chan_common *chan;
+ int err;
+
+ device->enumerate_channels = ioat2_enumerate_channels;
+ device->cleanup_tasklet = ioat2_cleanup_tasklet;
+ device->timer_fn = ioat2_timer_event;
+ device->self_test = ioat_dma_self_test;
+ dma = &device->common;
+ dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+ dma->device_issue_pending = ioat2_issue_pending;
+ dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+ dma->device_free_chan_resources = ioat2_free_chan_resources;
+ dma->device_is_tx_complete = ioat2_is_complete;
+
+ err = ioat_probe(device);
+ if (err)
+ return err;
+ ioat_set_tcp_copy_break(2048);
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ chan = to_chan_common(c);
+ writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
+ chan->reg_base + IOAT_DCACTRL_OFFSET);
+ }
+
+ err = ioat_register(device);
+ if (err)
+ return err;
+
+ ioat_kobject_add(device, &ioat2_ktype);
+
+ if (dca)
+ device->dca = ioat2_dca_init(pdev, device->reg_base);
+
+ return err;
+}
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
new file mode 100644
index 00000000000..1d849ef74d5
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_V2_H
+#define IOATDMA_V2_H
+
+#include <linux/dmaengine.h>
+#include "dma.h"
+#include "hw.h"
+
+
+extern int ioat_pending_level;
+extern int ioat_ring_alloc_order;
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+#define IOAT_MAX_ORDER 16
+#define ioat_get_alloc_order() \
+ (min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
+#define ioat_get_max_alloc_order() \
+ (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
+
+/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
+ * @base: common ioat channel parameters
+ * @xfercap_log; log2 of channel max transfer length (for fast division)
+ * @head: allocated index
+ * @issued: hardware notification point
+ * @tail: cleanup index
+ * @pending: lock free indicator for issued != head
+ * @dmacount: identical to 'head' except for occasionally resetting to zero
+ * @alloc_order: log2 of the number of allocated descriptors
+ * @ring: software ring buffer implementation of hardware ring
+ * @ring_lock: protects ring attributes
+ */
+struct ioat2_dma_chan {
+ struct ioat_chan_common base;
+ size_t xfercap_log;
+ u16 head;
+ u16 issued;
+ u16 tail;
+ u16 dmacount;
+ u16 alloc_order;
+ int pending;
+ struct ioat_ring_ent **ring;
+ spinlock_t ring_lock;
+};
+
+static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
+{
+ struct ioat_chan_common *chan = to_chan_common(c);
+
+ return container_of(chan, struct ioat2_dma_chan, base);
+}
+
+static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat)
+{
+ return (1 << ioat->alloc_order) - 1;
+}
+
+/* count of descriptors in flight with the engine */
+static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
+{
+ return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat);
+}
+
+/* count of descriptors pending submission to hardware */
+static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
+{
+ return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat);
+}
+
+static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat)
+{
+ u16 num_descs = ioat2_ring_mask(ioat) + 1;
+ u16 active = ioat2_ring_active(ioat);
+
+ BUG_ON(active > num_descs);
+
+ return num_descs - active;
+}
+
+/* assumes caller already checked space */
+static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len)
+{
+ ioat->head += len;
+ return ioat->head - len;
+}
+
+static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
+{
+ u16 num_descs = len >> ioat->xfercap_log;
+
+ num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
+ return num_descs;
+}
+
+/**
+ * struct ioat_ring_ent - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @fill: hardware fill descriptor
+ * @xor: hardware xor descriptor
+ * @xor_ex: hardware xor extension descriptor
+ * @pq: hardware pq descriptor
+ * @pq_ex: hardware pq extension descriptor
+ * @pqu: hardware pq update descriptor
+ * @raw: hardware raw (un-typed) descriptor
+ * @txd: the generic software descriptor for all engines
+ * @len: total transaction length for unmap
+ * @result: asynchronous result of validate operations
+ * @id: identifier for debug
+ */
+
+struct ioat_ring_ent {
+ union {
+ struct ioat_dma_descriptor *hw;
+ struct ioat_fill_descriptor *fill;
+ struct ioat_xor_descriptor *xor;
+ struct ioat_xor_ext_descriptor *xor_ex;
+ struct ioat_pq_descriptor *pq;
+ struct ioat_pq_ext_descriptor *pq_ex;
+ struct ioat_pq_update_descriptor *pqu;
+ struct ioat_raw_descriptor *raw;
+ };
+ size_t len;
+ struct dma_async_tx_descriptor txd;
+ enum sum_check_flags *result;
+ #ifdef DEBUG
+ int id;
+ #endif
+};
+
+static inline struct ioat_ring_ent *
+ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
+{
+ return ioat->ring[idx & ioat2_ring_mask(ioat)];
+}
+
+static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ writel(addr & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+ writel(addr >> 32,
+ chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+}
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
+struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
+int ioat2_enumerate_channels(struct ioatdma_device *device);
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags);
+void ioat2_issue_pending(struct dma_chan *chan);
+int ioat2_alloc_chan_resources(struct dma_chan *c);
+void ioat2_free_chan_resources(struct dma_chan *c);
+enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used);
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
+void ioat2_cleanup_tasklet(unsigned long data);
+void ioat2_timer_event(unsigned long data);
+extern struct kobj_type ioat2_ktype;
+extern struct kmem_cache *ioat2_cache;
+#endif /* IOATDMA_V2_H */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
new file mode 100644
index 00000000000..35d1e33afd5
--- /dev/null
+++ b/drivers/dma/ioat/dma_v3.c
@@ -0,0 +1,1223 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Support routines for v3+ hardware
+ */
+
+#include <linux/pci.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include "registers.h"
+#include "hw.h"
+#include "dma.h"
+#include "dma_v2.h"
+
+/* ioat hardware assumes at least two sources for raid operations */
+#define src_cnt_to_sw(x) ((x) + 2)
+#define src_cnt_to_hw(x) ((x) - 2)
+
+/* provide a lookup table for setting the source address in the base or
+ * extended descriptor of an xor or pq descriptor
+ */
+static const u8 xor_idx_to_desc __read_mostly = 0xd0;
+static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc __read_mostly = 0xf8;
+static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
+
+static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+ return raw->field[xor_idx_to_field[idx]];
+}
+
+static void xor_set_src(struct ioat_raw_descriptor *descs[2],
+ dma_addr_t addr, u32 offset, int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+ raw->field[xor_idx_to_field[idx]] = addr + offset;
+}
+
+static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+ return raw->field[pq_idx_to_field[idx]];
+}
+
+static void pq_set_src(struct ioat_raw_descriptor *descs[2],
+ dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+ struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
+ struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+ raw->field[pq_idx_to_field[idx]] = addr + offset;
+ pq->coef[idx] = coef;
+}
+
+static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
+ struct ioat_ring_ent *desc, int idx)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct pci_dev *pdev = chan->device->pdev;
+ size_t len = desc->len;
+ size_t offset = len - desc->hw->size;
+ struct dma_async_tx_descriptor *tx = &desc->txd;
+ enum dma_ctrl_flags flags = tx->flags;
+
+ switch (desc->hw->ctl_f.op) {
+ case IOAT_OP_COPY:
+ if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
+ ioat_dma_unmap(chan, flags, len, desc->hw);
+ break;
+ case IOAT_OP_FILL: {
+ struct ioat_fill_descriptor *hw = desc->fill;
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ ioat_unmap(pdev, hw->dst_addr - offset, len,
+ PCI_DMA_FROMDEVICE, flags, 1);
+ break;
+ }
+ case IOAT_OP_XOR_VAL:
+ case IOAT_OP_XOR: {
+ struct ioat_xor_descriptor *xor = desc->xor;
+ struct ioat_ring_ent *ext;
+ struct ioat_xor_ext_descriptor *xor_ex = NULL;
+ int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
+ struct ioat_raw_descriptor *descs[2];
+ int i;
+
+ if (src_cnt > 5) {
+ ext = ioat2_get_ring_ent(ioat, idx + 1);
+ xor_ex = ext->xor_ex;
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ descs[0] = (struct ioat_raw_descriptor *) xor;
+ descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+ for (i = 0; i < src_cnt; i++) {
+ dma_addr_t src = xor_get_src(descs, i);
+
+ ioat_unmap(pdev, src - offset, len,
+ PCI_DMA_TODEVICE, flags, 0);
+ }
+
+ /* dest is a source in xor validate operations */
+ if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
+ ioat_unmap(pdev, xor->dst_addr - offset, len,
+ PCI_DMA_TODEVICE, flags, 1);
+ break;
+ }
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ ioat_unmap(pdev, xor->dst_addr - offset, len,
+ PCI_DMA_FROMDEVICE, flags, 1);
+ break;
+ }
+ case IOAT_OP_PQ_VAL:
+ case IOAT_OP_PQ: {
+ struct ioat_pq_descriptor *pq = desc->pq;
+ struct ioat_ring_ent *ext;
+ struct ioat_pq_ext_descriptor *pq_ex = NULL;
+ int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+ struct ioat_raw_descriptor *descs[2];
+ int i;
+
+ if (src_cnt > 3) {
+ ext = ioat2_get_ring_ent(ioat, idx + 1);
+ pq_ex = ext->pq_ex;
+ }
+
+ /* in the 'continue' case don't unmap the dests as sources */
+ if (dmaf_p_disabled_continue(flags))
+ src_cnt--;
+ else if (dmaf_continue(flags))
+ src_cnt -= 3;
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ descs[0] = (struct ioat_raw_descriptor *) pq;
+ descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+ for (i = 0; i < src_cnt; i++) {
+ dma_addr_t src = pq_get_src(descs, i);
+
+ ioat_unmap(pdev, src - offset, len,
+ PCI_DMA_TODEVICE, flags, 0);
+ }
+
+ /* the dests are sources in pq validate operations */
+ if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
+ if (!(flags & DMA_PREP_PQ_DISABLE_P))
+ ioat_unmap(pdev, pq->p_addr - offset,
+ len, PCI_DMA_TODEVICE, flags, 0);
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ ioat_unmap(pdev, pq->q_addr - offset,
+ len, PCI_DMA_TODEVICE, flags, 0);
+ break;
+ }
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ if (!(flags & DMA_PREP_PQ_DISABLE_P))
+ ioat_unmap(pdev, pq->p_addr - offset, len,
+ PCI_DMA_BIDIRECTIONAL, flags, 1);
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ ioat_unmap(pdev, pq->q_addr - offset, len,
+ PCI_DMA_BIDIRECTIONAL, flags, 1);
+ }
+ break;
+ }
+ default:
+ dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
+ __func__, desc->hw->ctl_f.op);
+ }
+}
+
+static bool desc_has_ext(struct ioat_ring_ent *desc)
+{
+ struct ioat_dma_descriptor *hw = desc->hw;
+
+ if (hw->ctl_f.op == IOAT_OP_XOR ||
+ hw->ctl_f.op == IOAT_OP_XOR_VAL) {
+ struct ioat_xor_descriptor *xor = desc->xor;
+
+ if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
+ return true;
+ } else if (hw->ctl_f.op == IOAT_OP_PQ ||
+ hw->ctl_f.op == IOAT_OP_PQ_VAL) {
+ struct ioat_pq_descriptor *pq = desc->pq;
+
+ if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * __cleanup - reclaim used descriptors
+ * @ioat: channel (ring) to clean
+ *
+ * The difference from the dma_v2.c __cleanup() is that this routine
+ * handles extended descriptors and dma-unmapping raid operations.
+ */
+static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_ring_ent *desc;
+ bool seen_current = false;
+ u16 active;
+ int i;
+
+ dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued);
+
+ active = ioat2_ring_active(ioat);
+ for (i = 0; i < active && !seen_current; i++) {
+ struct dma_async_tx_descriptor *tx;
+
+ prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
+ desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+ dump_desc_dbg(ioat, desc);
+ tx = &desc->txd;
+ if (tx->cookie) {
+ chan->completed_cookie = tx->cookie;
+ ioat3_dma_unmap(ioat, desc, ioat->tail + i);
+ tx->cookie = 0;
+ if (tx->callback) {
+ tx->callback(tx->callback_param);
+ tx->callback = NULL;
+ }
+ }
+
+ if (tx->phys == phys_complete)
+ seen_current = true;
+
+ /* skip extended descriptors */
+ if (desc_has_ext(desc)) {
+ BUG_ON(i + 1 >= active);
+ i++;
+ }
+ }
+ ioat->tail += i;
+ BUG_ON(!seen_current); /* no active descs have written a completion? */
+ chan->last_completion = phys_complete;
+ if (ioat->head == ioat->tail) {
+ dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+ __func__);
+ clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+}
+
+static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ prefetch(chan->completion);
+
+ if (!spin_trylock_bh(&chan->cleanup_lock))
+ return;
+
+ if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ if (!spin_trylock_bh(&ioat->ring_lock)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ __cleanup(ioat, phys_complete);
+
+ spin_unlock_bh(&ioat->ring_lock);
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat3_cleanup_tasklet(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+
+ ioat3_cleanup(ioat);
+ writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
+ ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+ u32 status;
+
+ status = ioat_chansts(chan);
+ if (is_ioat_active(status) || is_ioat_idle(status))
+ ioat_suspend(chan);
+ while (is_ioat_active(status) || is_ioat_idle(status)) {
+ status = ioat_chansts(chan);
+ cpu_relax();
+ }
+
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+
+ __ioat2_restart_chan(ioat);
+}
+
+static void ioat3_timer_event(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+ struct ioat_chan_common *chan = &ioat->base;
+
+ spin_lock_bh(&chan->cleanup_lock);
+ if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+ unsigned long phys_complete;
+ u64 status;
+
+ spin_lock_bh(&ioat->ring_lock);
+ status = ioat_chansts(chan);
+
+ /* when halted due to errors check for channel
+ * programming errors before advancing the completion state
+ */
+ if (is_ioat_halted(status)) {
+ u32 chanerr;
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ BUG_ON(is_ioat_bug(chanerr));
+ }
+
+ /* if we haven't made progress and we have already
+ * acknowledged a pending completion once, then be more
+ * forceful with a restart
+ */
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+ else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+ ioat3_restart_channel(ioat);
+ else {
+ set_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ }
+ spin_unlock_bh(&ioat->ring_lock);
+ } else {
+ u16 active;
+
+ /* if the ring is idle, empty, and oversized try to step
+ * down the size
+ */
+ spin_lock_bh(&ioat->ring_lock);
+ active = ioat2_ring_active(ioat);
+ if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+ reshape_ring(ioat, ioat->alloc_order-1);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ /* keep shrinking until we get back to our minimum
+ * default size
+ */
+ if (ioat->alloc_order > ioat_get_alloc_order())
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static enum dma_status
+ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+ if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+ return DMA_SUCCESS;
+
+ ioat3_cleanup(ioat);
+
+ return ioat_is_complete(c, cookie, done, used);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_ring_ent *desc;
+ size_t total_len = len;
+ struct ioat_fill_descriptor *fill;
+ int num_descs;
+ u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
+ u16 idx;
+ int i;
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ fill = desc->fill;
+
+ fill->size = xfer_size;
+ fill->src_data = src_data;
+ fill->dst_addr = dest;
+ fill->ctl = 0;
+ fill->ctl_f.op = IOAT_OP_FILL;
+
+ len -= xfer_size;
+ dest += xfer_size;
+ dump_desc_dbg(ioat, desc);
+ } while (++i < num_descs);
+
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ fill->ctl_f.compl_write = 1;
+ dump_desc_dbg(ioat, desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
+ dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
+ size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_ring_ent *compl_desc;
+ struct ioat_ring_ent *desc;
+ struct ioat_ring_ent *ext;
+ size_t total_len = len;
+ struct ioat_xor_descriptor *xor;
+ struct ioat_xor_ext_descriptor *xor_ex = NULL;
+ struct ioat_dma_descriptor *hw;
+ u32 offset = 0;
+ int num_descs;
+ int with_ext;
+ int i;
+ u16 idx;
+ u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
+
+ BUG_ON(src_cnt < 2);
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ /* we need 2x the number of descriptors to cover greater than 5
+ * sources
+ */
+ if (src_cnt > 5) {
+ with_ext = 1;
+ num_descs *= 2;
+ } else
+ with_ext = 0;
+
+ /* completion writes from the raid engine may pass completion
+ * writes from the legacy engine, so we need one extra null
+ * (legacy) descriptor to ensure all completion writes arrive in
+ * order.
+ */
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ struct ioat_raw_descriptor *descs[2];
+ size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+ int s;
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ xor = desc->xor;
+
+ /* save a branch by unconditionally retrieving the
+ * extended descriptor xor_set_src() knows to not write
+ * to it in the single descriptor case
+ */
+ ext = ioat2_get_ring_ent(ioat, idx + i + 1);
+ xor_ex = ext->xor_ex;
+
+ descs[0] = (struct ioat_raw_descriptor *) xor;
+ descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+ for (s = 0; s < src_cnt; s++)
+ xor_set_src(descs, src[s], offset, s);
+ xor->size = xfer_size;
+ xor->dst_addr = dest + offset;
+ xor->ctl = 0;
+ xor->ctl_f.op = op;
+ xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
+
+ len -= xfer_size;
+ offset += xfer_size;
+ dump_desc_dbg(ioat, desc);
+ } while ((i += 1 + with_ext) < num_descs);
+
+ /* last xor descriptor carries the unmap parameters and fence bit */
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ if (result)
+ desc->result = result;
+ xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+ /* completion descriptor carries interrupt bit */
+ compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+ compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+ hw = compl_desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ dump_desc_dbg(ioat, compl_desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+ unsigned int src_cnt, size_t len,
+ enum sum_check_flags *result, unsigned long flags)
+{
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *result = 0;
+
+ return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
+ src_cnt - 1, len, flags);
+}
+
+static void
+dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
+{
+ struct device *dev = to_dev(&ioat->base);
+ struct ioat_pq_descriptor *pq = desc->pq;
+ struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
+ struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
+ int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+ int i;
+
+ dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+ " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
+ desc_id(desc), (unsigned long long) desc->txd.phys,
+ (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
+ desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
+ pq->ctl_f.compl_write,
+ pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+ pq->ctl_f.src_cnt);
+ for (i = 0; i < src_cnt; i++)
+ dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+ (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
+ dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+ dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
+ const dma_addr_t *dst, const dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_ring_ent *compl_desc;
+ struct ioat_ring_ent *desc;
+ struct ioat_ring_ent *ext;
+ size_t total_len = len;
+ struct ioat_pq_descriptor *pq;
+ struct ioat_pq_ext_descriptor *pq_ex = NULL;
+ struct ioat_dma_descriptor *hw;
+ u32 offset = 0;
+ int num_descs;
+ int with_ext;
+ int i, s;
+ u16 idx;
+ u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+
+ dev_dbg(to_dev(chan), "%s\n", __func__);
+ /* the engine requires at least two sources (we provide
+ * at least 1 implied source in the DMA_PREP_CONTINUE case)
+ */
+ BUG_ON(src_cnt + dmaf_continue(flags) < 2);
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ /* we need 2x the number of descriptors to cover greater than 3
+ * sources
+ */
+ if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) {
+ with_ext = 1;
+ num_descs *= 2;
+ } else
+ with_ext = 0;
+
+ /* completion writes from the raid engine may pass completion
+ * writes from the legacy engine, so we need one extra null
+ * (legacy) descriptor to ensure all completion writes arrive in
+ * order.
+ */
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ struct ioat_raw_descriptor *descs[2];
+ size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ pq = desc->pq;
+
+ /* save a branch by unconditionally retrieving the
+ * extended descriptor pq_set_src() knows to not write
+ * to it in the single descriptor case
+ */
+ ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
+ pq_ex = ext->pq_ex;
+
+ descs[0] = (struct ioat_raw_descriptor *) pq;
+ descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+
+ for (s = 0; s < src_cnt; s++)
+ pq_set_src(descs, src[s], offset, scf[s], s);
+
+ /* see the comment for dma_maxpq in include/linux/dmaengine.h */
+ if (dmaf_p_disabled_continue(flags))
+ pq_set_src(descs, dst[1], offset, 1, s++);
+ else if (dmaf_continue(flags)) {
+ pq_set_src(descs, dst[0], offset, 0, s++);
+ pq_set_src(descs, dst[1], offset, 1, s++);
+ pq_set_src(descs, dst[1], offset, 0, s++);
+ }
+ pq->size = xfer_size;
+ pq->p_addr = dst[0] + offset;
+ pq->q_addr = dst[1] + offset;
+ pq->ctl = 0;
+ pq->ctl_f.op = op;
+ pq->ctl_f.src_cnt = src_cnt_to_hw(s);
+ pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+ pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+ len -= xfer_size;
+ offset += xfer_size;
+ } while ((i += 1 + with_ext) < num_descs);
+
+ /* last pq descriptor carries the unmap parameters and fence bit */
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ if (result)
+ desc->result = result;
+ pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ dump_pq_desc_dbg(ioat, desc, ext);
+
+ /* completion descriptor carries interrupt bit */
+ compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+ compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+ hw = compl_desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ dump_desc_dbg(ioat, compl_desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags)
+{
+ /* handle the single source multiply case from the raid6
+ * recovery path
+ */
+ if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) {
+ dma_addr_t single_source[2];
+ unsigned char single_source_coef[2];
+
+ BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
+ single_source[0] = src[0];
+ single_source[1] = src[0];
+ single_source_coef[0] = scf[0];
+ single_source_coef[1] = 0;
+
+ return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
+ single_source_coef, len, flags);
+ } else
+ return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
+ len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ enum sum_check_flags *pqres, unsigned long flags)
+{
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *pqres = 0;
+
+ return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+ flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ unsigned char scf[src_cnt];
+ dma_addr_t pq[2];
+
+ memset(scf, 0, src_cnt);
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+ pq[0] = dst;
+ pq[1] = ~0;
+
+ return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+ flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+ unsigned int src_cnt, size_t len,
+ enum sum_check_flags *result, unsigned long flags)
+{
+ unsigned char scf[src_cnt];
+ dma_addr_t pq[2];
+
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *result = 0;
+
+ memset(scf, 0, src_cnt);
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+ pq[0] = src[0];
+ pq[1] = ~0;
+
+ return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
+ len, flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_ring_ent *desc;
+ struct ioat_dma_descriptor *hw;
+ u16 idx;
+
+ if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
+ desc = ioat2_get_ring_ent(ioat, idx);
+ else
+ return NULL;
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+
+ desc->txd.flags = flags;
+ desc->len = 1;
+
+ dump_desc_dbg(ioat, desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static void __devinit ioat3_dma_test_callback(void *dma_async_param)
+{
+ struct completion *cmp = dma_async_param;
+
+ complete(cmp);
+}
+
+#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
+static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[IOAT_NUM_SRC_TEST];
+ struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
+ dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
+ dma_addr_t dma_addr, dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ u32 xor_val_result;
+ int err = 0;
+ struct completion cmp;
+ unsigned long tmo;
+ struct device *dev = &device->pdev->dev;
+ struct dma_device *dma = &device->common;
+
+ dev_dbg(dev, "%s\n", __func__);
+
+ if (!dma_has_cap(DMA_XOR, dma->cap_mask))
+ return 0;
+
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx]) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(dma->channels.next, struct dma_chan,
+ device_node);
+ if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ IOAT_NUM_SRC_TEST, PAGE_SIZE,
+ DMA_PREP_INTERRUPT);
+
+ if (!tx) {
+ dev_err(dev, "Self-test xor prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test xor setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test xor timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_err(dev, "Self-test xor failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+ dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
+
+ /* skip validate if the capability is not present */
+ if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+ goto free_resources;
+
+ /* validate the sources with the destintation page */
+ for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+ xor_val_srcs[i] = xor_srcs[i];
+ xor_val_srcs[i] = dest;
+
+ xor_val_result = 1;
+
+ for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &xor_val_result, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test zero prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test zero setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test validate timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (xor_val_result != 0) {
+ dev_err(dev, "Self-test validate failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* skip memset if the capability is not present */
+ if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
+ goto free_resources;
+
+ /* test memset */
+ dma_addr = dma_map_page(dev, dest, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
+ DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test memset prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test memset setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test memset timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i]) {
+ dev_err(dev, "Self-test memset failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+ /* test for non-zero parity sum */
+ xor_val_result = 0;
+ for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &xor_val_result, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test 2nd zero prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test 2nd zero setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test 2nd validate timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (xor_val_result != SUM_CHECK_P_RESULT) {
+ dev_err(dev, "Self-test validate failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ dma->device_free_chan_resources(dma_chan);
+out:
+ src_idx = IOAT_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
+{
+ int rc = ioat_dma_self_test(device);
+
+ if (rc)
+ return rc;
+
+ rc = ioat_xor_val_self_test(device);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
+{
+ struct pci_dev *pdev = device->pdev;
+ struct dma_device *dma;
+ struct dma_chan *c;
+ struct ioat_chan_common *chan;
+ bool is_raid_device = false;
+ int err;
+ u16 dev_id;
+ u32 cap;
+
+ device->enumerate_channels = ioat2_enumerate_channels;
+ device->self_test = ioat3_dma_self_test;
+ dma = &device->common;
+ dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+ dma->device_issue_pending = ioat2_issue_pending;
+ dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+ dma->device_free_chan_resources = ioat2_free_chan_resources;
+
+ dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+ dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
+
+ cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
+ if (cap & IOAT_CAP_XOR) {
+ is_raid_device = true;
+ dma->max_xor = 8;
+ dma->xor_align = 2;
+
+ dma_cap_set(DMA_XOR, dma->cap_mask);
+ dma->device_prep_dma_xor = ioat3_prep_xor;
+
+ dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+ dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
+ }
+ if (cap & IOAT_CAP_PQ) {
+ is_raid_device = true;
+ dma_set_maxpq(dma, 8, 0);
+ dma->pq_align = 2;
+
+ dma_cap_set(DMA_PQ, dma->cap_mask);
+ dma->device_prep_dma_pq = ioat3_prep_pq;
+
+ dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
+ dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
+
+ if (!(cap & IOAT_CAP_XOR)) {
+ dma->max_xor = 8;
+ dma->xor_align = 2;
+
+ dma_cap_set(DMA_XOR, dma->cap_mask);
+ dma->device_prep_dma_xor = ioat3_prep_pqxor;
+
+ dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+ dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
+ }
+ }
+ if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
+ dma_cap_set(DMA_MEMSET, dma->cap_mask);
+ dma->device_prep_dma_memset = ioat3_prep_memset_lock;
+ }
+
+
+ if (is_raid_device) {
+ dma->device_is_tx_complete = ioat3_is_complete;
+ device->cleanup_tasklet = ioat3_cleanup_tasklet;
+ device->timer_fn = ioat3_timer_event;
+ } else {
+ dma->device_is_tx_complete = ioat2_is_complete;
+ device->cleanup_tasklet = ioat2_cleanup_tasklet;
+ device->timer_fn = ioat2_timer_event;
+ }
+
+ /* -= IOAT ver.3 workarounds =- */
+ /* Write CHANERRMSK_INT with 3E07h to mask out the errors
+ * that can cause stability issues for IOAT ver.3
+ */
+ pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
+
+ /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+ * (workaround for spurious config parity error after restart)
+ */
+ pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+ if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
+ pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
+
+ err = ioat_probe(device);
+ if (err)
+ return err;
+ ioat_set_tcp_copy_break(262144);
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ chan = to_chan_common(c);
+ writel(IOAT_DMA_DCA_ANY_CPU,
+ chan->reg_base + IOAT_DCACTRL_OFFSET);
+ }
+
+ err = ioat_register(device);
+ if (err)
+ return err;
+
+ ioat_kobject_add(device, &ioat2_ktype);
+
+ if (dca)
+ device->dca = ioat3_dca_init(pdev, device->reg_base);
+
+ return 0;
+}
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
new file mode 100644
index 00000000000..99afb12bd40
--- /dev/null
+++ b/drivers/dma/ioat/hw.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_PCI_VID 0x8086
+#define IOAT_MMIO_BAR 0
+
+/* CB device ID's */
+#define IOAT_PCI_DID_5000 0x1A38
+#define IOAT_PCI_DID_CNB 0x360B
+#define IOAT_PCI_DID_SCNB 0x65FF
+#define IOAT_PCI_DID_SNB 0x402F
+
+#define IOAT_PCI_RID 0x00
+#define IOAT_PCI_SVID 0x8086
+#define IOAT_PCI_SID 0x8086
+#define IOAT_VER_1_2 0x12 /* Version 1.2 */
+#define IOAT_VER_2_0 0x20 /* Version 2.0 */
+#define IOAT_VER_3_0 0x30 /* Version 3.0 */
+#define IOAT_VER_3_2 0x32 /* Version 3.2 */
+
+struct ioat_dma_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int null:1;
+ unsigned int src_brk:1;
+ unsigned int dest_brk:1;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int rsvd2:13;
+ #define IOAT_OP_COPY 0x00
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t rsv1;
+ uint64_t rsv2;
+ /* store some driver data in an unused portion of the descriptor */
+ union {
+ uint64_t user1;
+ uint64_t tx_cnt;
+ };
+ uint64_t user2;
+};
+
+struct ioat_fill_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int rsvd:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int rsvd2:2;
+ unsigned int dest_brk:1;
+ unsigned int bundle:1;
+ unsigned int rsvd4:15;
+ #define IOAT_OP_FILL 0x01
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_data;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t rsv1;
+ uint64_t next_dst_addr;
+ uint64_t user1;
+ uint64_t user2;
+};
+
+struct ioat_xor_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int rsvd:13;
+ #define IOAT_OP_XOR 0x87
+ #define IOAT_OP_XOR_VAL 0x88
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+};
+
+struct ioat_xor_ext_descriptor {
+ uint64_t src_addr6;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t next;
+ uint64_t rsvd[4];
+};
+
+struct ioat_pq_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int p_disable:1;
+ unsigned int q_disable:1;
+ unsigned int rsvd:11;
+ #define IOAT_OP_PQ 0x89
+ #define IOAT_OP_PQ_VAL 0x8a
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint8_t coef[8];
+ uint64_t q_addr;
+};
+
+struct ioat_pq_ext_descriptor {
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+ uint64_t src_addr6;
+ uint64_t next;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t rsvd[2];
+};
+
+struct ioat_pq_update_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int p_disable:1;
+ unsigned int q_disable:1;
+ unsigned int rsvd:3;
+ unsigned int coef:8;
+ #define IOAT_OP_PQ_UP 0x8b
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t p_src;
+ uint64_t q_src;
+ uint64_t q_addr;
+};
+
+struct ioat_raw_descriptor {
+ uint64_t field[8];
+};
+#endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
new file mode 100644
index 00000000000..d545fae30f3
--- /dev/null
+++ b/drivers/dma/ioat/pci.c
@@ -0,0 +1,210 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static struct pci_device_id ioat_pci_tbl[] = {
+ /* I/OAT v1 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
+ { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+
+ /* I/OAT v2 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
+
+ /* I/OAT v3 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+ /* I/OAT v3.2 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id);
+static void __devexit ioat_remove(struct pci_dev *pdev);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+
+struct kmem_cache *ioat2_cache;
+
+#define DRV_NAME "ioatdma"
+
+static struct pci_driver ioat_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = ioat_pci_tbl,
+ .probe = ioat_pci_probe,
+ .remove = __devexit_p(ioat_remove),
+};
+
+static struct ioatdma_device *
+alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct device *dev = &pdev->dev;
+ struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+
+ if (!d)
+ return NULL;
+ d->pdev = pdev;
+ d->reg_base = iobase;
+ return d;
+}
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ void __iomem * const *iomap;
+ struct device *dev = &pdev->dev;
+ struct ioatdma_device *device;
+ int err;
+
+ err = pcim_enable_device(pdev);
+ if (err)
+ return err;
+
+ err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
+ if (err)
+ return err;
+ iomap = pcim_iomap_table(pdev);
+ if (!iomap)
+ return -ENOMEM;
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err)
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err)
+ return err;
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err)
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err)
+ return err;
+
+ device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL);
+ if (!device)
+ return -ENOMEM;
+
+ pci_set_master(pdev);
+
+ device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
+ if (!device)
+ return -ENOMEM;
+ pci_set_drvdata(pdev, device);
+
+ device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+ if (device->version == IOAT_VER_1_2)
+ err = ioat1_dma_probe(device, ioat_dca_enabled);
+ else if (device->version == IOAT_VER_2_0)
+ err = ioat2_dma_probe(device, ioat_dca_enabled);
+ else if (device->version >= IOAT_VER_3_0)
+ err = ioat3_dma_probe(device, ioat_dca_enabled);
+ else
+ return -ENODEV;
+
+ if (err) {
+ dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static void __devexit ioat_remove(struct pci_dev *pdev)
+{
+ struct ioatdma_device *device = pci_get_drvdata(pdev);
+
+ if (!device)
+ return;
+
+ dev_err(&pdev->dev, "Removing dma and dca services\n");
+ if (device->dca) {
+ unregister_dca_provider(device->dca, &pdev->dev);
+ free_dca_provider(device->dca);
+ device->dca = NULL;
+ }
+ ioat_dma_remove(device);
+}
+
+static int __init ioat_init_module(void)
+{
+ int err;
+
+ pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+ DRV_NAME, IOAT_DMA_VERSION);
+
+ ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ioat2_cache)
+ return -ENOMEM;
+
+ err = pci_register_driver(&ioat_pci_driver);
+ if (err)
+ kmem_cache_destroy(ioat2_cache);
+
+ return err;
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+ pci_unregister_driver(&ioat_pci_driver);
+ kmem_cache_destroy(ioat2_cache);
+}
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioat/registers.h
index 49bc277424f..63038e18ab0 100644
--- a/drivers/dma/ioatdma_registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -64,18 +64,37 @@
#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
+
+#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
+#define IOAT_CAP_PAGE_BREAK 0x00000001
+#define IOAT_CAP_CRC 0x00000002
+#define IOAT_CAP_SKIP_MARKER 0x00000004
+#define IOAT_CAP_DCA 0x00000010
+#define IOAT_CAP_CRC_MOVE 0x00000020
+#define IOAT_CAP_FILL_BLOCK 0x00000040
+#define IOAT_CAP_APIC 0x00000080
+#define IOAT_CAP_XOR 0x00000100
+#define IOAT_CAP_PQ 0x00000200
#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
/* DMA Channel Registers */
#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
-#define IOAT_CHANCTRL_INT_DISABLE 0x0001
+#define IOAT_CHANCTRL_INT_REARM 0x0001
+#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
+ IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+ IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\
+ IOAT_CHANCTRL_ERR_INT_EN)
#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
@@ -94,14 +113,14 @@
#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
-#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F
-#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010
-#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL
+#define IOAT_CHANSTS_STATUS 0x7ULL
+#define IOAT_CHANSTS_ACTIVE 0x0
+#define IOAT_CHANSTS_DONE 0x1
+#define IOAT_CHANSTS_SUSPENDED 0x2
+#define IOAT_CHANSTS_HALTED 0x3
@@ -204,22 +223,27 @@
#define IOAT_CDAR_OFFSET_HIGH 0x24
#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
-#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001
-#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002
-#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004
-#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008
+#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008
#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
#define IOAT_CHANERR_CHANCMD_ERR 0x0020
#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
#define IOAT_CHANERR_READ_DATA_ERR 0x0100
#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
-#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400
-#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800
+#define IOAT_CHANERR_CONTROL_ERR 0x0400
+#define IOAT_CHANERR_LENGTH_ERR 0x0800
#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
#define IOAT_CHANERR_SOFT_ERR 0x4000
#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
+#define IOAT_CHANERR_XOR_Q_ERR 0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
deleted file mode 100644
index a600fc0f796..00000000000
--- a/drivers/dma/ioat_dma.c
+++ /dev/null
@@ -1,1741 +0,0 @@
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2004 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-/*
- * This driver supports an Intel I/OAT DMA engine, which does asynchronous
- * copy operations.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/i7300_idle.h>
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-#include "ioatdma_hw.h"
-
-#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
-#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
-#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
-#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
-
-#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
-static int ioat_pending_level = 4;
-module_param(ioat_pending_level, int, 0644);
-MODULE_PARM_DESC(ioat_pending_level,
- "high-water mark for pushing ioat descriptors (default: 4)");
-
-#define RESET_DELAY msecs_to_jiffies(100)
-#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000))
-static void ioat_dma_chan_reset_part2(struct work_struct *work);
-static void ioat_dma_chan_watchdog(struct work_struct *work);
-
-/*
- * workaround for IOAT ver.3.0 null descriptor issue
- * (channel returns error when size is 0)
- */
-#define NULL_DESC_BUFFER_SIZE 1
-
-/* internal functions */
-static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
-static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
-
-static struct ioat_desc_sw *
-ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
-static struct ioat_desc_sw *
-ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
-
-static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
- struct ioatdma_device *device,
- int index)
-{
- return device->idx[index];
-}
-
-/**
- * ioat_dma_do_interrupt - handler used for single vector interrupt mode
- * @irq: interrupt id
- * @data: interrupt data
- */
-static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
-{
- struct ioatdma_device *instance = data;
- struct ioat_dma_chan *ioat_chan;
- unsigned long attnstatus;
- int bit;
- u8 intrctrl;
-
- intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
-
- if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
- return IRQ_NONE;
-
- if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
- writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
- return IRQ_NONE;
- }
-
- attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
- for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
- ioat_chan = ioat_lookup_chan_by_index(instance, bit);
- tasklet_schedule(&ioat_chan->cleanup_task);
- }
-
- writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
- return IRQ_HANDLED;
-}
-
-/**
- * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
- * @irq: interrupt id
- * @data: interrupt data
- */
-static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
-{
- struct ioat_dma_chan *ioat_chan = data;
-
- tasklet_schedule(&ioat_chan->cleanup_task);
-
- return IRQ_HANDLED;
-}
-
-static void ioat_dma_cleanup_tasklet(unsigned long data);
-
-/**
- * ioat_dma_enumerate_channels - find and initialize the device's channels
- * @device: the device to be enumerated
- */
-static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
-{
- u8 xfercap_scale;
- u32 xfercap;
- int i;
- struct ioat_dma_chan *ioat_chan;
-
- /*
- * IOAT ver.3 workarounds
- */
- if (device->version == IOAT_VER_3_0) {
- u32 chan_err_mask;
- u16 dev_id;
- u32 dmauncerrsts;
-
- /*
- * Write CHANERRMSK_INT with 3E07h to mask out the errors
- * that can cause stability issues for IOAT ver.3
- */
- chan_err_mask = 0x3E07;
- pci_write_config_dword(device->pdev,
- IOAT_PCI_CHANERRMASK_INT_OFFSET,
- chan_err_mask);
-
- /*
- * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
- * (workaround for spurious config parity error after restart)
- */
- pci_read_config_word(device->pdev,
- IOAT_PCI_DEVICE_ID_OFFSET,
- &dev_id);
- if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
- dmauncerrsts = 0x10;
- pci_write_config_dword(device->pdev,
- IOAT_PCI_DMAUNCERRSTS_OFFSET,
- dmauncerrsts);
- }
- }
-
- device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
- xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
- xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
-
-#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
- if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) {
- device->common.chancnt--;
- }
-#endif
- for (i = 0; i < device->common.chancnt; i++) {
- ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
- if (!ioat_chan) {
- device->common.chancnt = i;
- break;
- }
-
- ioat_chan->device = device;
- ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
- ioat_chan->xfercap = xfercap;
- ioat_chan->desccount = 0;
- INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
- if (ioat_chan->device->version == IOAT_VER_2_0)
- writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE |
- IOAT_DMA_DCA_ANY_CPU,
- ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
- else if (ioat_chan->device->version == IOAT_VER_3_0)
- writel(IOAT_DMA_DCA_ANY_CPU,
- ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
- spin_lock_init(&ioat_chan->cleanup_lock);
- spin_lock_init(&ioat_chan->desc_lock);
- INIT_LIST_HEAD(&ioat_chan->free_desc);
- INIT_LIST_HEAD(&ioat_chan->used_desc);
- /* This should be made common somewhere in dmaengine.c */
- ioat_chan->common.device = &device->common;
- list_add_tail(&ioat_chan->common.device_node,
- &device->common.channels);
- device->idx[i] = ioat_chan;
- tasklet_init(&ioat_chan->cleanup_task,
- ioat_dma_cleanup_tasklet,
- (unsigned long) ioat_chan);
- tasklet_disable(&ioat_chan->cleanup_task);
- }
- return device->common.chancnt;
-}
-
-/**
- * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
- * descriptors to hw
- * @chan: DMA channel handle
- */
-static inline void __ioat1_dma_memcpy_issue_pending(
- struct ioat_dma_chan *ioat_chan)
-{
- ioat_chan->pending = 0;
- writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET);
-}
-
-static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-
- if (ioat_chan->pending > 0) {
- spin_lock_bh(&ioat_chan->desc_lock);
- __ioat1_dma_memcpy_issue_pending(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
- }
-}
-
-static inline void __ioat2_dma_memcpy_issue_pending(
- struct ioat_dma_chan *ioat_chan)
-{
- ioat_chan->pending = 0;
- writew(ioat_chan->dmacount,
- ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
-}
-
-static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-
- if (ioat_chan->pending > 0) {
- spin_lock_bh(&ioat_chan->desc_lock);
- __ioat2_dma_memcpy_issue_pending(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
- }
-}
-
-
-/**
- * ioat_dma_chan_reset_part2 - reinit the channel after a reset
- */
-static void ioat_dma_chan_reset_part2(struct work_struct *work)
-{
- struct ioat_dma_chan *ioat_chan =
- container_of(work, struct ioat_dma_chan, work.work);
- struct ioat_desc_sw *desc;
-
- spin_lock_bh(&ioat_chan->cleanup_lock);
- spin_lock_bh(&ioat_chan->desc_lock);
-
- ioat_chan->completion_virt->low = 0;
- ioat_chan->completion_virt->high = 0;
- ioat_chan->pending = 0;
-
- /*
- * count the descriptors waiting, and be sure to do it
- * right for both the CB1 line and the CB2 ring
- */
- ioat_chan->dmacount = 0;
- if (ioat_chan->used_desc.prev) {
- desc = to_ioat_desc(ioat_chan->used_desc.prev);
- do {
- ioat_chan->dmacount++;
- desc = to_ioat_desc(desc->node.next);
- } while (&desc->node != ioat_chan->used_desc.next);
- }
-
- /*
- * write the new starting descriptor address
- * this puts channel engine into ARMED state
- */
- desc = to_ioat_desc(ioat_chan->used_desc.prev);
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
- writel(((u64) desc->async_tx.phys) >> 32,
- ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
-
- writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
- + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
- break;
- case IOAT_VER_2_0:
- writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
- writel(((u64) desc->async_tx.phys) >> 32,
- ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
-
- /* tell the engine to go with what's left to be done */
- writew(ioat_chan->dmacount,
- ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
-
- break;
- }
- dev_err(&ioat_chan->device->pdev->dev,
- "chan%d reset - %d descs waiting, %d total desc\n",
- chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
-
- spin_unlock_bh(&ioat_chan->desc_lock);
- spin_unlock_bh(&ioat_chan->cleanup_lock);
-}
-
-/**
- * ioat_dma_reset_channel - restart a channel
- * @ioat_chan: IOAT DMA channel handle
- */
-static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
-{
- u32 chansts, chanerr;
-
- if (!ioat_chan->used_desc.prev)
- return;
-
- chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
- chansts = (ioat_chan->completion_virt->low
- & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
- if (chanerr) {
- dev_err(&ioat_chan->device->pdev->dev,
- "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
- chan_num(ioat_chan), chansts, chanerr);
- writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
- }
-
- /*
- * whack it upside the head with a reset
- * and wait for things to settle out.
- * force the pending count to a really big negative
- * to make sure no one forces an issue_pending
- * while we're waiting.
- */
-
- spin_lock_bh(&ioat_chan->desc_lock);
- ioat_chan->pending = INT_MIN;
- writeb(IOAT_CHANCMD_RESET,
- ioat_chan->reg_base
- + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- /* schedule the 2nd half instead of sleeping a long time */
- schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
-}
-
-/**
- * ioat_dma_chan_watchdog - watch for stuck channels
- */
-static void ioat_dma_chan_watchdog(struct work_struct *work)
-{
- struct ioatdma_device *device =
- container_of(work, struct ioatdma_device, work.work);
- struct ioat_dma_chan *ioat_chan;
- int i;
-
- union {
- u64 full;
- struct {
- u32 low;
- u32 high;
- };
- } completion_hw;
- unsigned long compl_desc_addr_hw;
-
- for (i = 0; i < device->common.chancnt; i++) {
- ioat_chan = ioat_lookup_chan_by_index(device, i);
-
- if (ioat_chan->device->version == IOAT_VER_1_2
- /* have we started processing anything yet */
- && ioat_chan->last_completion
- /* have we completed any since last watchdog cycle? */
- && (ioat_chan->last_completion ==
- ioat_chan->watchdog_completion)
- /* has TCP stuck on one cookie since last watchdog? */
- && (ioat_chan->watchdog_tcp_cookie ==
- ioat_chan->watchdog_last_tcp_cookie)
- && (ioat_chan->watchdog_tcp_cookie !=
- ioat_chan->completed_cookie)
- /* is there something in the chain to be processed? */
- /* CB1 chain always has at least the last one processed */
- && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
- && ioat_chan->pending == 0) {
-
- /*
- * check CHANSTS register for completed
- * descriptor address.
- * if it is different than completion writeback,
- * it is not zero
- * and it has changed since the last watchdog
- * we can assume that channel
- * is still working correctly
- * and the problem is in completion writeback.
- * update completion writeback
- * with actual CHANSTS value
- * else
- * try resetting the channel
- */
-
- completion_hw.low = readl(ioat_chan->reg_base +
- IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
- completion_hw.high = readl(ioat_chan->reg_base +
- IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
-#if (BITS_PER_LONG == 64)
- compl_desc_addr_hw =
- completion_hw.full
- & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
-#else
- compl_desc_addr_hw =
- completion_hw.low & IOAT_LOW_COMPLETION_MASK;
-#endif
-
- if ((compl_desc_addr_hw != 0)
- && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
- && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
- ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
- ioat_chan->completion_virt->low = completion_hw.low;
- ioat_chan->completion_virt->high = completion_hw.high;
- } else {
- ioat_dma_reset_channel(ioat_chan);
- ioat_chan->watchdog_completion = 0;
- ioat_chan->last_compl_desc_addr_hw = 0;
- }
-
- /*
- * for version 2.0 if there are descriptors yet to be processed
- * and the last completed hasn't changed since the last watchdog
- * if they haven't hit the pending level
- * issue the pending to push them through
- * else
- * try resetting the channel
- */
- } else if (ioat_chan->device->version == IOAT_VER_2_0
- && ioat_chan->used_desc.prev
- && ioat_chan->last_completion
- && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
-
- if (ioat_chan->pending < ioat_pending_level)
- ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
- else {
- ioat_dma_reset_channel(ioat_chan);
- ioat_chan->watchdog_completion = 0;
- }
- } else {
- ioat_chan->last_compl_desc_addr_hw = 0;
- ioat_chan->watchdog_completion
- = ioat_chan->last_completion;
- }
-
- ioat_chan->watchdog_last_tcp_cookie =
- ioat_chan->watchdog_tcp_cookie;
- }
-
- schedule_delayed_work(&device->work, WATCHDOG_DELAY);
-}
-
-static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
- struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
- struct ioat_desc_sw *prev, *new;
- struct ioat_dma_descriptor *hw;
- dma_cookie_t cookie;
- LIST_HEAD(new_chain);
- u32 copy;
- size_t len;
- dma_addr_t src, dst;
- unsigned long orig_flags;
- unsigned int desc_count = 0;
-
- /* src and dest and len are stored in the initial descriptor */
- len = first->len;
- src = first->src;
- dst = first->dst;
- orig_flags = first->async_tx.flags;
- new = first;
-
- spin_lock_bh(&ioat_chan->desc_lock);
- prev = to_ioat_desc(ioat_chan->used_desc.prev);
- prefetch(prev->hw);
- do {
- copy = min_t(size_t, len, ioat_chan->xfercap);
-
- async_tx_ack(&new->async_tx);
-
- hw = new->hw;
- hw->size = copy;
- hw->ctl = 0;
- hw->src_addr = src;
- hw->dst_addr = dst;
- hw->next = 0;
-
- /* chain together the physical address list for the HW */
- wmb();
- prev->hw->next = (u64) new->async_tx.phys;
-
- len -= copy;
- dst += copy;
- src += copy;
-
- list_add_tail(&new->node, &new_chain);
- desc_count++;
- prev = new;
- } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
-
- if (!new) {
- dev_err(&ioat_chan->device->pdev->dev,
- "tx submit failed\n");
- spin_unlock_bh(&ioat_chan->desc_lock);
- return -ENOMEM;
- }
-
- hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
- if (first->async_tx.callback) {
- hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
- if (first != new) {
- /* move callback into to last desc */
- new->async_tx.callback = first->async_tx.callback;
- new->async_tx.callback_param
- = first->async_tx.callback_param;
- first->async_tx.callback = NULL;
- first->async_tx.callback_param = NULL;
- }
- }
-
- new->tx_cnt = desc_count;
- new->async_tx.flags = orig_flags; /* client is in control of this ack */
-
- /* store the original values for use in later cleanup */
- if (new != first) {
- new->src = first->src;
- new->dst = first->dst;
- new->len = first->len;
- }
-
- /* cookie incr and addition to used_list must be atomic */
- cookie = ioat_chan->common.cookie;
- cookie++;
- if (cookie < 0)
- cookie = 1;
- ioat_chan->common.cookie = new->async_tx.cookie = cookie;
-
- /* write address into NextDescriptor field of last desc in chain */
- to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
- first->async_tx.phys;
- list_splice_tail(&new_chain, &ioat_chan->used_desc);
-
- ioat_chan->dmacount += desc_count;
- ioat_chan->pending += desc_count;
- if (ioat_chan->pending >= ioat_pending_level)
- __ioat1_dma_memcpy_issue_pending(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- return cookie;
-}
-
-static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
- struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
- struct ioat_desc_sw *new;
- struct ioat_dma_descriptor *hw;
- dma_cookie_t cookie;
- u32 copy;
- size_t len;
- dma_addr_t src, dst;
- unsigned long orig_flags;
- unsigned int desc_count = 0;
-
- /* src and dest and len are stored in the initial descriptor */
- len = first->len;
- src = first->src;
- dst = first->dst;
- orig_flags = first->async_tx.flags;
- new = first;
-
- /*
- * ioat_chan->desc_lock is still in force in version 2 path
- * it gets unlocked at end of this function
- */
- do {
- copy = min_t(size_t, len, ioat_chan->xfercap);
-
- async_tx_ack(&new->async_tx);
-
- hw = new->hw;
- hw->size = copy;
- hw->ctl = 0;
- hw->src_addr = src;
- hw->dst_addr = dst;
-
- len -= copy;
- dst += copy;
- src += copy;
- desc_count++;
- } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
-
- if (!new) {
- dev_err(&ioat_chan->device->pdev->dev,
- "tx submit failed\n");
- spin_unlock_bh(&ioat_chan->desc_lock);
- return -ENOMEM;
- }
-
- hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
- if (first->async_tx.callback) {
- hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
- if (first != new) {
- /* move callback into to last desc */
- new->async_tx.callback = first->async_tx.callback;
- new->async_tx.callback_param
- = first->async_tx.callback_param;
- first->async_tx.callback = NULL;
- first->async_tx.callback_param = NULL;
- }
- }
-
- new->tx_cnt = desc_count;
- new->async_tx.flags = orig_flags; /* client is in control of this ack */
-
- /* store the original values for use in later cleanup */
- if (new != first) {
- new->src = first->src;
- new->dst = first->dst;
- new->len = first->len;
- }
-
- /* cookie incr and addition to used_list must be atomic */
- cookie = ioat_chan->common.cookie;
- cookie++;
- if (cookie < 0)
- cookie = 1;
- ioat_chan->common.cookie = new->async_tx.cookie = cookie;
-
- ioat_chan->dmacount += desc_count;
- ioat_chan->pending += desc_count;
- if (ioat_chan->pending >= ioat_pending_level)
- __ioat2_dma_memcpy_issue_pending(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- return cookie;
-}
-
-/**
- * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
- * @ioat_chan: the channel supplying the memory pool for the descriptors
- * @flags: allocation flags
- */
-static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
- struct ioat_dma_chan *ioat_chan,
- gfp_t flags)
-{
- struct ioat_dma_descriptor *desc;
- struct ioat_desc_sw *desc_sw;
- struct ioatdma_device *ioatdma_device;
- dma_addr_t phys;
-
- ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
- desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
- if (unlikely(!desc))
- return NULL;
-
- desc_sw = kzalloc(sizeof(*desc_sw), flags);
- if (unlikely(!desc_sw)) {
- pci_pool_free(ioatdma_device->dma_pool, desc, phys);
- return NULL;
- }
-
- memset(desc, 0, sizeof(*desc));
- dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- desc_sw->async_tx.tx_submit = ioat1_tx_submit;
- break;
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- desc_sw->async_tx.tx_submit = ioat2_tx_submit;
- break;
- }
-
- desc_sw->hw = desc;
- desc_sw->async_tx.phys = phys;
-
- return desc_sw;
-}
-
-static int ioat_initial_desc_count = 256;
-module_param(ioat_initial_desc_count, int, 0644);
-MODULE_PARM_DESC(ioat_initial_desc_count,
- "initial descriptors per channel (default: 256)");
-
-/**
- * ioat2_dma_massage_chan_desc - link the descriptors into a circle
- * @ioat_chan: the channel to be massaged
- */
-static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
-{
- struct ioat_desc_sw *desc, *_desc;
-
- /* setup used_desc */
- ioat_chan->used_desc.next = ioat_chan->free_desc.next;
- ioat_chan->used_desc.prev = NULL;
-
- /* pull free_desc out of the circle so that every node is a hw
- * descriptor, but leave it pointing to the list
- */
- ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next;
- ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev;
-
- /* circle link the hw descriptors */
- desc = to_ioat_desc(ioat_chan->free_desc.next);
- desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
- list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) {
- desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
- }
-}
-
-/**
- * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
- * @chan: the channel to be filled out
- */
-static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
- struct ioat_desc_sw *desc;
- u16 chanctrl;
- u32 chanerr;
- int i;
- LIST_HEAD(tmp_list);
-
- /* have we already been set up? */
- if (!list_empty(&ioat_chan->free_desc))
- return ioat_chan->desccount;
-
- /* Setup register to interrupt and write completion status on error */
- chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
- IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
- IOAT_CHANCTRL_ERR_COMPLETION_EN;
- writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
-
- chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
- if (chanerr) {
- dev_err(&ioat_chan->device->pdev->dev,
- "CHANERR = %x, clearing\n", chanerr);
- writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
- }
-
- /* Allocate descriptors */
- for (i = 0; i < ioat_initial_desc_count; i++) {
- desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
- if (!desc) {
- dev_err(&ioat_chan->device->pdev->dev,
- "Only %d initial descriptors\n", i);
- break;
- }
- list_add_tail(&desc->node, &tmp_list);
- }
- spin_lock_bh(&ioat_chan->desc_lock);
- ioat_chan->desccount = i;
- list_splice(&tmp_list, &ioat_chan->free_desc);
- if (ioat_chan->device->version != IOAT_VER_1_2)
- ioat2_dma_massage_chan_desc(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- /* allocate a completion writeback area */
- /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
- ioat_chan->completion_virt =
- pci_pool_alloc(ioat_chan->device->completion_pool,
- GFP_KERNEL,
- &ioat_chan->completion_addr);
- memset(ioat_chan->completion_virt, 0,
- sizeof(*ioat_chan->completion_virt));
- writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
- writel(((u64) ioat_chan->completion_addr) >> 32,
- ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
-
- tasklet_enable(&ioat_chan->cleanup_task);
- ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */
- return ioat_chan->desccount;
-}
-
-/**
- * ioat_dma_free_chan_resources - release all the descriptors
- * @chan: the channel to be cleaned
- */
-static void ioat_dma_free_chan_resources(struct dma_chan *chan)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
- struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
- struct ioat_desc_sw *desc, *_desc;
- int in_use_descs = 0;
-
- /* Before freeing channel resources first check
- * if they have been previously allocated for this channel.
- */
- if (ioat_chan->desccount == 0)
- return;
-
- tasklet_disable(&ioat_chan->cleanup_task);
- ioat_dma_memcpy_cleanup(ioat_chan);
-
- /* Delay 100ms after reset to allow internal DMA logic to quiesce
- * before removing DMA descriptor resources.
- */
- writeb(IOAT_CHANCMD_RESET,
- ioat_chan->reg_base
- + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
- mdelay(100);
-
- spin_lock_bh(&ioat_chan->desc_lock);
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- list_for_each_entry_safe(desc, _desc,
- &ioat_chan->used_desc, node) {
- in_use_descs++;
- list_del(&desc->node);
- pci_pool_free(ioatdma_device->dma_pool, desc->hw,
- desc->async_tx.phys);
- kfree(desc);
- }
- list_for_each_entry_safe(desc, _desc,
- &ioat_chan->free_desc, node) {
- list_del(&desc->node);
- pci_pool_free(ioatdma_device->dma_pool, desc->hw,
- desc->async_tx.phys);
- kfree(desc);
- }
- break;
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- list_for_each_entry_safe(desc, _desc,
- ioat_chan->free_desc.next, node) {
- list_del(&desc->node);
- pci_pool_free(ioatdma_device->dma_pool, desc->hw,
- desc->async_tx.phys);
- kfree(desc);
- }
- desc = to_ioat_desc(ioat_chan->free_desc.next);
- pci_pool_free(ioatdma_device->dma_pool, desc->hw,
- desc->async_tx.phys);
- kfree(desc);
- INIT_LIST_HEAD(&ioat_chan->free_desc);
- INIT_LIST_HEAD(&ioat_chan->used_desc);
- break;
- }
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- pci_pool_free(ioatdma_device->completion_pool,
- ioat_chan->completion_virt,
- ioat_chan->completion_addr);
-
- /* one is ok since we left it on there on purpose */
- if (in_use_descs > 1)
- dev_err(&ioat_chan->device->pdev->dev,
- "Freeing %d in use descriptors!\n",
- in_use_descs - 1);
-
- ioat_chan->last_completion = ioat_chan->completion_addr = 0;
- ioat_chan->pending = 0;
- ioat_chan->dmacount = 0;
- ioat_chan->desccount = 0;
- ioat_chan->watchdog_completion = 0;
- ioat_chan->last_compl_desc_addr_hw = 0;
- ioat_chan->watchdog_tcp_cookie =
- ioat_chan->watchdog_last_tcp_cookie = 0;
-}
-
-/**
- * ioat_dma_get_next_descriptor - return the next available descriptor
- * @ioat_chan: IOAT DMA channel handle
- *
- * Gets the next descriptor from the chain, and must be called with the
- * channel's desc_lock held. Allocates more descriptors if the channel
- * has run out.
- */
-static struct ioat_desc_sw *
-ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
-{
- struct ioat_desc_sw *new;
-
- if (!list_empty(&ioat_chan->free_desc)) {
- new = to_ioat_desc(ioat_chan->free_desc.next);
- list_del(&new->node);
- } else {
- /* try to get another desc */
- new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
- if (!new) {
- dev_err(&ioat_chan->device->pdev->dev,
- "alloc failed\n");
- return NULL;
- }
- }
-
- prefetch(new->hw);
- return new;
-}
-
-static struct ioat_desc_sw *
-ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
-{
- struct ioat_desc_sw *new;
-
- /*
- * used.prev points to where to start processing
- * used.next points to next free descriptor
- * if used.prev == NULL, there are none waiting to be processed
- * if used.next == used.prev.prev, there is only one free descriptor,
- * and we need to use it to as a noop descriptor before
- * linking in a new set of descriptors, since the device
- * has probably already read the pointer to it
- */
- if (ioat_chan->used_desc.prev &&
- ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
-
- struct ioat_desc_sw *desc;
- struct ioat_desc_sw *noop_desc;
- int i;
-
- /* set up the noop descriptor */
- noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
- /* set size to non-zero value (channel returns error when size is 0) */
- noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
- noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
- noop_desc->hw->src_addr = 0;
- noop_desc->hw->dst_addr = 0;
-
- ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
- ioat_chan->pending++;
- ioat_chan->dmacount++;
-
- /* try to get a few more descriptors */
- for (i = 16; i; i--) {
- desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
- if (!desc) {
- dev_err(&ioat_chan->device->pdev->dev,
- "alloc failed\n");
- break;
- }
- list_add_tail(&desc->node, ioat_chan->used_desc.next);
-
- desc->hw->next
- = to_ioat_desc(desc->node.next)->async_tx.phys;
- to_ioat_desc(desc->node.prev)->hw->next
- = desc->async_tx.phys;
- ioat_chan->desccount++;
- }
-
- ioat_chan->used_desc.next = noop_desc->node.next;
- }
- new = to_ioat_desc(ioat_chan->used_desc.next);
- prefetch(new);
- ioat_chan->used_desc.next = new->node.next;
-
- if (ioat_chan->used_desc.prev == NULL)
- ioat_chan->used_desc.prev = &new->node;
-
- prefetch(new->hw);
- return new;
-}
-
-static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
- struct ioat_dma_chan *ioat_chan)
-{
- if (!ioat_chan)
- return NULL;
-
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- return ioat1_dma_get_next_descriptor(ioat_chan);
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- return ioat2_dma_get_next_descriptor(ioat_chan);
- }
- return NULL;
-}
-
-static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
- struct dma_chan *chan,
- dma_addr_t dma_dest,
- dma_addr_t dma_src,
- size_t len,
- unsigned long flags)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
- struct ioat_desc_sw *new;
-
- spin_lock_bh(&ioat_chan->desc_lock);
- new = ioat_dma_get_next_descriptor(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- if (new) {
- new->len = len;
- new->dst = dma_dest;
- new->src = dma_src;
- new->async_tx.flags = flags;
- return &new->async_tx;
- } else {
- dev_err(&ioat_chan->device->pdev->dev,
- "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
- chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
- return NULL;
- }
-}
-
-static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
- struct dma_chan *chan,
- dma_addr_t dma_dest,
- dma_addr_t dma_src,
- size_t len,
- unsigned long flags)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
- struct ioat_desc_sw *new;
-
- spin_lock_bh(&ioat_chan->desc_lock);
- new = ioat2_dma_get_next_descriptor(ioat_chan);
-
- /*
- * leave ioat_chan->desc_lock set in ioat 2 path
- * it will get unlocked at end of tx_submit
- */
-
- if (new) {
- new->len = len;
- new->dst = dma_dest;
- new->src = dma_src;
- new->async_tx.flags = flags;
- return &new->async_tx;
- } else {
- spin_unlock_bh(&ioat_chan->desc_lock);
- dev_err(&ioat_chan->device->pdev->dev,
- "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
- chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
- return NULL;
- }
-}
-
-static void ioat_dma_cleanup_tasklet(unsigned long data)
-{
- struct ioat_dma_chan *chan = (void *)data;
- ioat_dma_memcpy_cleanup(chan);
- writew(IOAT_CHANCTRL_INT_DISABLE,
- chan->reg_base + IOAT_CHANCTRL_OFFSET);
-}
-
-static void
-ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
-{
- if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
- pci_unmap_single(ioat_chan->device->pdev,
- pci_unmap_addr(desc, dst),
- pci_unmap_len(desc, len),
- PCI_DMA_FROMDEVICE);
- else
- pci_unmap_page(ioat_chan->device->pdev,
- pci_unmap_addr(desc, dst),
- pci_unmap_len(desc, len),
- PCI_DMA_FROMDEVICE);
- }
-
- if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
- pci_unmap_single(ioat_chan->device->pdev,
- pci_unmap_addr(desc, src),
- pci_unmap_len(desc, len),
- PCI_DMA_TODEVICE);
- else
- pci_unmap_page(ioat_chan->device->pdev,
- pci_unmap_addr(desc, src),
- pci_unmap_len(desc, len),
- PCI_DMA_TODEVICE);
- }
-}
-
-/**
- * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
- * @chan: ioat channel to be cleaned up
- */
-static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
-{
- unsigned long phys_complete;
- struct ioat_desc_sw *desc, *_desc;
- dma_cookie_t cookie = 0;
- unsigned long desc_phys;
- struct ioat_desc_sw *latest_desc;
-
- prefetch(ioat_chan->completion_virt);
-
- if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
- return;
-
- /* The completion writeback can happen at any time,
- so reads by the driver need to be atomic operations
- The descriptor physical addresses are limited to 32-bits
- when the CPU can only do a 32-bit mov */
-
-#if (BITS_PER_LONG == 64)
- phys_complete =
- ioat_chan->completion_virt->full
- & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
-#else
- phys_complete =
- ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
-#endif
-
- if ((ioat_chan->completion_virt->full
- & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
- IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
- dev_err(&ioat_chan->device->pdev->dev,
- "Channel halted, chanerr = %x\n",
- readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
-
- /* TODO do something to salvage the situation */
- }
-
- if (phys_complete == ioat_chan->last_completion) {
- spin_unlock_bh(&ioat_chan->cleanup_lock);
- /*
- * perhaps we're stuck so hard that the watchdog can't go off?
- * try to catch it after 2 seconds
- */
- if (ioat_chan->device->version != IOAT_VER_3_0) {
- if (time_after(jiffies,
- ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
- ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
- ioat_chan->last_completion_time = jiffies;
- }
- }
- return;
- }
- ioat_chan->last_completion_time = jiffies;
-
- cookie = 0;
- if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
- spin_unlock_bh(&ioat_chan->cleanup_lock);
- return;
- }
-
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- list_for_each_entry_safe(desc, _desc,
- &ioat_chan->used_desc, node) {
-
- /*
- * Incoming DMA requests may use multiple descriptors,
- * due to exceeding xfercap, perhaps. If so, only the
- * last one will have a cookie, and require unmapping.
- */
- if (desc->async_tx.cookie) {
- cookie = desc->async_tx.cookie;
- ioat_dma_unmap(ioat_chan, desc);
- if (desc->async_tx.callback) {
- desc->async_tx.callback(desc->async_tx.callback_param);
- desc->async_tx.callback = NULL;
- }
- }
-
- if (desc->async_tx.phys != phys_complete) {
- /*
- * a completed entry, but not the last, so clean
- * up if the client is done with the descriptor
- */
- if (async_tx_test_ack(&desc->async_tx)) {
- list_move_tail(&desc->node,
- &ioat_chan->free_desc);
- } else
- desc->async_tx.cookie = 0;
- } else {
- /*
- * last used desc. Do not remove, so we can
- * append from it, but don't look at it next
- * time, either
- */
- desc->async_tx.cookie = 0;
-
- /* TODO check status bits? */
- break;
- }
- }
- break;
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- /* has some other thread has already cleaned up? */
- if (ioat_chan->used_desc.prev == NULL)
- break;
-
- /* work backwards to find latest finished desc */
- desc = to_ioat_desc(ioat_chan->used_desc.next);
- latest_desc = NULL;
- do {
- desc = to_ioat_desc(desc->node.prev);
- desc_phys = (unsigned long)desc->async_tx.phys
- & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
- if (desc_phys == phys_complete) {
- latest_desc = desc;
- break;
- }
- } while (&desc->node != ioat_chan->used_desc.prev);
-
- if (latest_desc != NULL) {
-
- /* work forwards to clear finished descriptors */
- for (desc = to_ioat_desc(ioat_chan->used_desc.prev);
- &desc->node != latest_desc->node.next &&
- &desc->node != ioat_chan->used_desc.next;
- desc = to_ioat_desc(desc->node.next)) {
- if (desc->async_tx.cookie) {
- cookie = desc->async_tx.cookie;
- desc->async_tx.cookie = 0;
- ioat_dma_unmap(ioat_chan, desc);
- if (desc->async_tx.callback) {
- desc->async_tx.callback(desc->async_tx.callback_param);
- desc->async_tx.callback = NULL;
- }
- }
- }
-
- /* move used.prev up beyond those that are finished */
- if (&desc->node == ioat_chan->used_desc.next)
- ioat_chan->used_desc.prev = NULL;
- else
- ioat_chan->used_desc.prev = &desc->node;
- }
- break;
- }
-
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- ioat_chan->last_completion = phys_complete;
- if (cookie != 0)
- ioat_chan->completed_cookie = cookie;
-
- spin_unlock_bh(&ioat_chan->cleanup_lock);
-}
-
-/**
- * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
- * @chan: IOAT DMA channel handle
- * @cookie: DMA transaction identifier
- * @done: if not %NULL, updated with last completed transaction
- * @used: if not %NULL, updated with last used transaction
- */
-static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
- dma_cookie_t cookie,
- dma_cookie_t *done,
- dma_cookie_t *used)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
- dma_cookie_t last_used;
- dma_cookie_t last_complete;
- enum dma_status ret;
-
- last_used = chan->cookie;
- last_complete = ioat_chan->completed_cookie;
- ioat_chan->watchdog_tcp_cookie = cookie;
-
- if (done)
- *done = last_complete;
- if (used)
- *used = last_used;
-
- ret = dma_async_is_complete(cookie, last_complete, last_used);
- if (ret == DMA_SUCCESS)
- return ret;
-
- ioat_dma_memcpy_cleanup(ioat_chan);
-
- last_used = chan->cookie;
- last_complete = ioat_chan->completed_cookie;
-
- if (done)
- *done = last_complete;
- if (used)
- *used = last_used;
-
- return dma_async_is_complete(cookie, last_complete, last_used);
-}
-
-static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
-{
- struct ioat_desc_sw *desc;
-
- spin_lock_bh(&ioat_chan->desc_lock);
-
- desc = ioat_dma_get_next_descriptor(ioat_chan);
-
- if (!desc) {
- dev_err(&ioat_chan->device->pdev->dev,
- "Unable to start null desc - get next desc failed\n");
- spin_unlock_bh(&ioat_chan->desc_lock);
- return;
- }
-
- desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
- | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
- | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
- /* set size to non-zero value (channel returns error when size is 0) */
- desc->hw->size = NULL_DESC_BUFFER_SIZE;
- desc->hw->src_addr = 0;
- desc->hw->dst_addr = 0;
- async_tx_ack(&desc->async_tx);
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- desc->hw->next = 0;
- list_add_tail(&desc->node, &ioat_chan->used_desc);
-
- writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
- writel(((u64) desc->async_tx.phys) >> 32,
- ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
-
- writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
- + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
- break;
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
- writel(((u64) desc->async_tx.phys) >> 32,
- ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
-
- ioat_chan->dmacount++;
- __ioat2_dma_memcpy_issue_pending(ioat_chan);
- break;
- }
- spin_unlock_bh(&ioat_chan->desc_lock);
-}
-
-/*
- * Perform a IOAT transaction to verify the HW works.
- */
-#define IOAT_TEST_SIZE 2000
-
-static void ioat_dma_test_callback(void *dma_async_param)
-{
- struct completion *cmp = dma_async_param;
-
- complete(cmp);
-}
-
-/**
- * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
- * @device: device to be tested
- */
-static int ioat_dma_self_test(struct ioatdma_device *device)
-{
- int i;
- u8 *src;
- u8 *dest;
- struct dma_chan *dma_chan;
- struct dma_async_tx_descriptor *tx;
- dma_addr_t dma_dest, dma_src;
- dma_cookie_t cookie;
- int err = 0;
- struct completion cmp;
- unsigned long tmo;
- unsigned long flags;
-
- src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
- if (!src)
- return -ENOMEM;
- dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
- if (!dest) {
- kfree(src);
- return -ENOMEM;
- }
-
- /* Fill in src buffer */
- for (i = 0; i < IOAT_TEST_SIZE; i++)
- src[i] = (u8)i;
-
- /* Start copy, using first DMA channel */
- dma_chan = container_of(device->common.channels.next,
- struct dma_chan,
- device_node);
- if (device->common.device_alloc_chan_resources(dma_chan) < 1) {
- dev_err(&device->pdev->dev,
- "selftest cannot allocate chan resource\n");
- err = -ENODEV;
- goto out;
- }
-
- dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
- DMA_TO_DEVICE);
- dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
- DMA_FROM_DEVICE);
- flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE;
- tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
- IOAT_TEST_SIZE, flags);
- if (!tx) {
- dev_err(&device->pdev->dev,
- "Self-test prep failed, disabling\n");
- err = -ENODEV;
- goto free_resources;
- }
-
- async_tx_ack(tx);
- init_completion(&cmp);
- tx->callback = ioat_dma_test_callback;
- tx->callback_param = &cmp;
- cookie = tx->tx_submit(tx);
- if (cookie < 0) {
- dev_err(&device->pdev->dev,
- "Self-test setup failed, disabling\n");
- err = -ENODEV;
- goto free_resources;
- }
- device->common.device_issue_pending(dma_chan);
-
- tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
-
- if (tmo == 0 ||
- device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL)
- != DMA_SUCCESS) {
- dev_err(&device->pdev->dev,
- "Self-test copy timed out, disabling\n");
- err = -ENODEV;
- goto free_resources;
- }
- if (memcmp(src, dest, IOAT_TEST_SIZE)) {
- dev_err(&device->pdev->dev,
- "Self-test copy failed compare, disabling\n");
- err = -ENODEV;
- goto free_resources;
- }
-
-free_resources:
- device->common.device_free_chan_resources(dma_chan);
-out:
- kfree(src);
- kfree(dest);
- return err;
-}
-
-static char ioat_interrupt_style[32] = "msix";
-module_param_string(ioat_interrupt_style, ioat_interrupt_style,
- sizeof(ioat_interrupt_style), 0644);
-MODULE_PARM_DESC(ioat_interrupt_style,
- "set ioat interrupt style: msix (default), "
- "msix-single-vector, msi, intx)");
-
-/**
- * ioat_dma_setup_interrupts - setup interrupt handler
- * @device: ioat device
- */
-static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
-{
- struct ioat_dma_chan *ioat_chan;
- int err, i, j, msixcnt;
- u8 intrctrl = 0;
-
- if (!strcmp(ioat_interrupt_style, "msix"))
- goto msix;
- if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
- goto msix_single_vector;
- if (!strcmp(ioat_interrupt_style, "msi"))
- goto msi;
- if (!strcmp(ioat_interrupt_style, "intx"))
- goto intx;
- dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n",
- ioat_interrupt_style);
- goto err_no_irq;
-
-msix:
- /* The number of MSI-X vectors should equal the number of channels */
- msixcnt = device->common.chancnt;
- for (i = 0; i < msixcnt; i++)
- device->msix_entries[i].entry = i;
-
- err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt);
- if (err < 0)
- goto msi;
- if (err > 0)
- goto msix_single_vector;
-
- for (i = 0; i < msixcnt; i++) {
- ioat_chan = ioat_lookup_chan_by_index(device, i);
- err = request_irq(device->msix_entries[i].vector,
- ioat_dma_do_interrupt_msix,
- 0, "ioat-msix", ioat_chan);
- if (err) {
- for (j = 0; j < i; j++) {
- ioat_chan =
- ioat_lookup_chan_by_index(device, j);
- free_irq(device->msix_entries[j].vector,
- ioat_chan);
- }
- goto msix_single_vector;
- }
- }
- intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
- device->irq_mode = msix_multi_vector;
- goto done;
-
-msix_single_vector:
- device->msix_entries[0].entry = 0;
- err = pci_enable_msix(device->pdev, device->msix_entries, 1);
- if (err)
- goto msi;
-
- err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt,
- 0, "ioat-msix", device);
- if (err) {
- pci_disable_msix(device->pdev);
- goto msi;
- }
- device->irq_mode = msix_single_vector;
- goto done;
-
-msi:
- err = pci_enable_msi(device->pdev);
- if (err)
- goto intx;
-
- err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
- 0, "ioat-msi", device);
- if (err) {
- pci_disable_msi(device->pdev);
- goto intx;
- }
- /*
- * CB 1.2 devices need a bit set in configuration space to enable MSI
- */
- if (device->version == IOAT_VER_1_2) {
- u32 dmactrl;
- pci_read_config_dword(device->pdev,
- IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
- dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
- pci_write_config_dword(device->pdev,
- IOAT_PCI_DMACTRL_OFFSET, dmactrl);
- }
- device->irq_mode = msi;
- goto done;
-
-intx:
- err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
- IRQF_SHARED, "ioat-intx", device);
- if (err)
- goto err_no_irq;
- device->irq_mode = intx;
-
-done:
- intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
- writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
- return 0;
-
-err_no_irq:
- /* Disable all interrupt generation */
- writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
- dev_err(&device->pdev->dev, "no usable interrupts\n");
- device->irq_mode = none;
- return -1;
-}
-
-/**
- * ioat_dma_remove_interrupts - remove whatever interrupts were set
- * @device: ioat device
- */
-static void ioat_dma_remove_interrupts(struct ioatdma_device *device)
-{
- struct ioat_dma_chan *ioat_chan;
- int i;
-
- /* Disable all interrupt generation */
- writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
-
- switch (device->irq_mode) {
- case msix_multi_vector:
- for (i = 0; i < device->common.chancnt; i++) {
- ioat_chan = ioat_lookup_chan_by_index(device, i);
- free_irq(device->msix_entries[i].vector, ioat_chan);
- }
- pci_disable_msix(device->pdev);
- break;
- case msix_single_vector:
- free_irq(device->msix_entries[0].vector, device);
- pci_disable_msix(device->pdev);
- break;
- case msi:
- free_irq(device->pdev->irq, device);
- pci_disable_msi(device->pdev);
- break;
- case intx:
- free_irq(device->pdev->irq, device);
- break;
- case none:
- dev_warn(&device->pdev->dev,
- "call to %s without interrupts setup\n", __func__);
- }
- device->irq_mode = none;
-}
-
-struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
- void __iomem *iobase)
-{
- int err;
- struct ioatdma_device *device;
-
- device = kzalloc(sizeof(*device), GFP_KERNEL);
- if (!device) {
- err = -ENOMEM;
- goto err_kzalloc;
- }
- device->pdev = pdev;
- device->reg_base = iobase;
- device->version = readb(device->reg_base + IOAT_VER_OFFSET);
-
- /* DMA coherent memory pool for DMA descriptor allocations */
- device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
- sizeof(struct ioat_dma_descriptor),
- 64, 0);
- if (!device->dma_pool) {
- err = -ENOMEM;
- goto err_dma_pool;
- }
-
- device->completion_pool = pci_pool_create("completion_pool", pdev,
- sizeof(u64), SMP_CACHE_BYTES,
- SMP_CACHE_BYTES);
- if (!device->completion_pool) {
- err = -ENOMEM;
- goto err_completion_pool;
- }
-
- INIT_LIST_HEAD(&device->common.channels);
- ioat_dma_enumerate_channels(device);
-
- device->common.device_alloc_chan_resources =
- ioat_dma_alloc_chan_resources;
- device->common.device_free_chan_resources =
- ioat_dma_free_chan_resources;
- device->common.dev = &pdev->dev;
-
- dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
- device->common.device_is_tx_complete = ioat_dma_is_complete;
- switch (device->version) {
- case IOAT_VER_1_2:
- device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
- device->common.device_issue_pending =
- ioat1_dma_memcpy_issue_pending;
- break;
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
- device->common.device_issue_pending =
- ioat2_dma_memcpy_issue_pending;
- break;
- }
-
- dev_err(&device->pdev->dev,
- "Intel(R) I/OAT DMA Engine found,"
- " %d channels, device version 0x%02x, driver version %s\n",
- device->common.chancnt, device->version, IOAT_DMA_VERSION);
-
- if (!device->common.chancnt) {
- dev_err(&device->pdev->dev,
- "Intel(R) I/OAT DMA Engine problem found: "
- "zero channels detected\n");
- goto err_setup_interrupts;
- }
-
- err = ioat_dma_setup_interrupts(device);
- if (err)
- goto err_setup_interrupts;
-
- err = ioat_dma_self_test(device);
- if (err)
- goto err_self_test;
-
- ioat_set_tcp_copy_break(device);
-
- dma_async_device_register(&device->common);
-
- if (device->version != IOAT_VER_3_0) {
- INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
- schedule_delayed_work(&device->work,
- WATCHDOG_DELAY);
- }
-
- return device;
-
-err_self_test:
- ioat_dma_remove_interrupts(device);
-err_setup_interrupts:
- pci_pool_destroy(device->completion_pool);
-err_completion_pool:
- pci_pool_destroy(device->dma_pool);
-err_dma_pool:
- kfree(device);
-err_kzalloc:
- dev_err(&pdev->dev,
- "Intel(R) I/OAT DMA Engine initialization failed\n");
- return NULL;
-}
-
-void ioat_dma_remove(struct ioatdma_device *device)
-{
- struct dma_chan *chan, *_chan;
- struct ioat_dma_chan *ioat_chan;
-
- if (device->version != IOAT_VER_3_0)
- cancel_delayed_work(&device->work);
-
- ioat_dma_remove_interrupts(device);
-
- dma_async_device_unregister(&device->common);
-
- pci_pool_destroy(device->dma_pool);
- pci_pool_destroy(device->completion_pool);
-
- iounmap(device->reg_base);
- pci_release_regions(device->pdev);
- pci_disable_device(device->pdev);
-
- list_for_each_entry_safe(chan, _chan,
- &device->common.channels, device_node) {
- ioat_chan = to_ioat_chan(chan);
- list_del(&chan->device_node);
- kfree(ioat_chan);
- }
- kfree(device);
-}
-
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
deleted file mode 100644
index a52ff4bd460..00000000000
--- a/drivers/dma/ioatdma.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef IOATDMA_H
-#define IOATDMA_H
-
-#include <linux/dmaengine.h>
-#include "ioatdma_hw.h"
-#include <linux/init.h>
-#include <linux/dmapool.h>
-#include <linux/cache.h>
-#include <linux/pci_ids.h>
-#include <net/tcp.h>
-
-#define IOAT_DMA_VERSION "3.64"
-
-enum ioat_interrupt {
- none = 0,
- msix_multi_vector = 1,
- msix_single_vector = 2,
- msi = 3,
- intx = 4,
-};
-
-#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
-#define IOAT_DMA_DCA_ANY_CPU ~0
-#define IOAT_WATCHDOG_PERIOD (2 * HZ)
-
-
-/**
- * struct ioatdma_device - internal representation of a IOAT device
- * @pdev: PCI-Express device
- * @reg_base: MMIO register space base address
- * @dma_pool: for allocating DMA descriptors
- * @common: embedded struct dma_device
- * @version: version of ioatdma device
- * @irq_mode: which style irq to use
- * @msix_entries: irq handlers
- * @idx: per channel data
- */
-
-struct ioatdma_device {
- struct pci_dev *pdev;
- void __iomem *reg_base;
- struct pci_pool *dma_pool;
- struct pci_pool *completion_pool;
- struct dma_device common;
- u8 version;
- enum ioat_interrupt irq_mode;
- struct delayed_work work;
- struct msix_entry msix_entries[4];
- struct ioat_dma_chan *idx[4];
-};
-
-/**
- * struct ioat_dma_chan - internal representation of a DMA channel
- */
-struct ioat_dma_chan {
-
- void __iomem *reg_base;
-
- dma_cookie_t completed_cookie;
- unsigned long last_completion;
- unsigned long last_completion_time;
-
- size_t xfercap; /* XFERCAP register value expanded out */
-
- spinlock_t cleanup_lock;
- spinlock_t desc_lock;
- struct list_head free_desc;
- struct list_head used_desc;
- unsigned long watchdog_completion;
- int watchdog_tcp_cookie;
- u32 watchdog_last_tcp_cookie;
- struct delayed_work work;
-
- int pending;
- int dmacount;
- int desccount;
-
- struct ioatdma_device *device;
- struct dma_chan common;
-
- dma_addr_t completion_addr;
- union {
- u64 full; /* HW completion writeback */
- struct {
- u32 low;
- u32 high;
- };
- } *completion_virt;
- unsigned long last_compl_desc_addr_hw;
- struct tasklet_struct cleanup_task;
-};
-
-/* wrapper around hardware descriptor format + additional software fields */
-
-/**
- * struct ioat_desc_sw - wrapper around hardware descriptor
- * @hw: hardware DMA descriptor
- * @node: this descriptor will either be on the free list,
- * or attached to a transaction list (async_tx.tx_list)
- * @tx_cnt: number of descriptors required to complete the transaction
- * @async_tx: the generic software descriptor for all engines
- */
-struct ioat_desc_sw {
- struct ioat_dma_descriptor *hw;
- struct list_head node;
- int tx_cnt;
- size_t len;
- dma_addr_t src;
- dma_addr_t dst;
- struct dma_async_tx_descriptor async_tx;
-};
-
-static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
-{
- #ifdef CONFIG_NET_DMA
- switch (dev->version) {
- case IOAT_VER_1_2:
- sysctl_tcp_dma_copybreak = 4096;
- break;
- case IOAT_VER_2_0:
- sysctl_tcp_dma_copybreak = 2048;
- break;
- case IOAT_VER_3_0:
- sysctl_tcp_dma_copybreak = 262144;
- break;
- }
- #endif
-}
-
-#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
-struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
- void __iomem *iobase);
-void ioat_dma_remove(struct ioatdma_device *device);
-struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-#else
-#define ioat_dma_probe(pdev, iobase) NULL
-#define ioat_dma_remove(device) do { } while (0)
-#define ioat_dca_init(pdev, iobase) NULL
-#define ioat2_dca_init(pdev, iobase) NULL
-#define ioat3_dca_init(pdev, iobase) NULL
-#endif
-
-#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h
deleted file mode 100644
index afa57eef86c..00000000000
--- a/drivers/dma/ioatdma_hw.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef _IOAT_HW_H_
-#define _IOAT_HW_H_
-
-/* PCI Configuration Space Values */
-#define IOAT_PCI_VID 0x8086
-
-/* CB device ID's */
-#define IOAT_PCI_DID_5000 0x1A38
-#define IOAT_PCI_DID_CNB 0x360B
-#define IOAT_PCI_DID_SCNB 0x65FF
-#define IOAT_PCI_DID_SNB 0x402F
-
-#define IOAT_PCI_RID 0x00
-#define IOAT_PCI_SVID 0x8086
-#define IOAT_PCI_SID 0x8086
-#define IOAT_VER_1_2 0x12 /* Version 1.2 */
-#define IOAT_VER_2_0 0x20 /* Version 2.0 */
-#define IOAT_VER_3_0 0x30 /* Version 3.0 */
-
-struct ioat_dma_descriptor {
- uint32_t size;
- uint32_t ctl;
- uint64_t src_addr;
- uint64_t dst_addr;
- uint64_t next;
- uint64_t rsv1;
- uint64_t rsv2;
- uint64_t user1;
- uint64_t user2;
-};
-
-#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001
-#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002
-#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004
-#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008
-#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010
-#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020
-#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040
-#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080
-#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100
-#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200
-#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400
-
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000
-
-#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000
-
-#endif
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 2f052265122..645ca8d54ec 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -31,6 +31,7 @@
#include <linux/platform_device.h>
#include <linux/memory.h>
#include <linux/ioport.h>
+#include <linux/raid/pq.h>
#include <mach/adma.h>
@@ -57,65 +58,110 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
}
}
+static void
+iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+ struct iop_adma_desc_slot *unmap = desc->group_head;
+ struct device *dev = &iop_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ enum dma_ctrl_flags flags = tx->flags;
+ u32 src_cnt;
+ dma_addr_t addr;
+ dma_addr_t dest;
+
+ src_cnt = unmap->unmap_src_cnt;
+ dest = iop_desc_get_dest_addr(unmap, iop_chan);
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ enum dma_data_direction dir;
+
+ if (src_cnt > 1) /* is xor? */
+ dir = DMA_BIDIRECTIONAL;
+ else
+ dir = DMA_FROM_DEVICE;
+
+ dma_unmap_page(dev, dest, len, dir);
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ while (src_cnt--) {
+ addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
+ if (addr == dest)
+ continue;
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ }
+ desc->group_head = NULL;
+}
+
+static void
+iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+ struct iop_adma_desc_slot *unmap = desc->group_head;
+ struct device *dev = &iop_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ enum dma_ctrl_flags flags = tx->flags;
+ u32 src_cnt = unmap->unmap_src_cnt;
+ dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
+ dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
+ int i;
+
+ if (tx->flags & DMA_PREP_CONTINUE)
+ src_cnt -= 3;
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
+ dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
+ dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ dma_addr_t addr;
+
+ for (i = 0; i < src_cnt; i++) {
+ addr = iop_desc_get_src_addr(unmap, iop_chan, i);
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ if (desc->pq_check_result) {
+ dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
+ dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
+ }
+ }
+
+ desc->group_head = NULL;
+}
+
+
static dma_cookie_t
iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
{
- BUG_ON(desc->async_tx.cookie < 0);
- if (desc->async_tx.cookie > 0) {
- cookie = desc->async_tx.cookie;
- desc->async_tx.cookie = 0;
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+ BUG_ON(tx->cookie < 0);
+ if (tx->cookie > 0) {
+ cookie = tx->cookie;
+ tx->cookie = 0;
/* call the callback (must not sleep or submit new
* operations to this channel)
*/
- if (desc->async_tx.callback)
- desc->async_tx.callback(
- desc->async_tx.callback_param);
+ if (tx->callback)
+ tx->callback(tx->callback_param);
/* unmap dma addresses
* (unmap_single vs unmap_page?)
*/
if (desc->group_head && desc->unmap_len) {
- struct iop_adma_desc_slot *unmap = desc->group_head;
- struct device *dev =
- &iop_chan->device->pdev->dev;
- u32 len = unmap->unmap_len;
- enum dma_ctrl_flags flags = desc->async_tx.flags;
- u32 src_cnt;
- dma_addr_t addr;
- dma_addr_t dest;
-
- src_cnt = unmap->unmap_src_cnt;
- dest = iop_desc_get_dest_addr(unmap, iop_chan);
- if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- enum dma_data_direction dir;
-
- if (src_cnt > 1) /* is xor? */
- dir = DMA_BIDIRECTIONAL;
- else
- dir = DMA_FROM_DEVICE;
-
- dma_unmap_page(dev, dest, len, dir);
- }
-
- if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- while (src_cnt--) {
- addr = iop_desc_get_src_addr(unmap,
- iop_chan,
- src_cnt);
- if (addr == dest)
- continue;
- dma_unmap_page(dev, addr, len,
- DMA_TO_DEVICE);
- }
- }
- desc->group_head = NULL;
+ if (iop_desc_is_pq(desc))
+ iop_desc_unmap_pq(iop_chan, desc);
+ else
+ iop_desc_unmap(iop_chan, desc);
}
}
/* run dependent operations */
- dma_run_dependencies(&desc->async_tx);
+ dma_run_dependencies(tx);
return cookie;
}
@@ -287,7 +333,12 @@ static void iop_adma_tasklet(unsigned long data)
{
struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
- spin_lock(&iop_chan->lock);
+ /* lockdep will flag depedency submissions as potentially
+ * recursive locking, this is not the case as a dependency
+ * submission will never recurse a channels submit routine.
+ * There are checks in async_tx.c to prevent this.
+ */
+ spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
__iop_adma_slot_cleanup(iop_chan);
spin_unlock(&iop_chan->lock);
}
@@ -370,7 +421,7 @@ retry:
}
alloc_tail->group_head = alloc_start;
alloc_tail->async_tx.cookie = -EBUSY;
- list_splice(&chain, &alloc_tail->async_tx.tx_list);
+ list_splice(&chain, &alloc_tail->tx_list);
iop_chan->last_used = last_used;
iop_desc_clear_next_desc(alloc_start);
iop_desc_clear_next_desc(alloc_tail);
@@ -429,7 +480,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
old_chain_tail = list_entry(iop_chan->chain.prev,
struct iop_adma_desc_slot, chain_node);
- list_splice_init(&sw_desc->async_tx.tx_list,
+ list_splice_init(&sw_desc->tx_list,
&old_chain_tail->chain_node);
/* fix up the hardware chain */
@@ -496,6 +547,7 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
dma_async_tx_descriptor_init(&slot->async_tx, chan);
slot->async_tx.tx_submit = iop_adma_tx_submit;
+ INIT_LIST_HEAD(&slot->tx_list);
INIT_LIST_HEAD(&slot->chain_node);
INIT_LIST_HEAD(&slot->slot_node);
hw_desc = (char *) iop_chan->device->dma_desc_pool;
@@ -660,9 +712,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
}
static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
- unsigned int src_cnt, size_t len, u32 *result,
- unsigned long flags)
+iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
+ unsigned int src_cnt, size_t len, u32 *result,
+ unsigned long flags)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -696,6 +748,118 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
return sw_desc ? &sw_desc->async_tx : NULL;
}
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *g;
+ int slot_cnt, slots_per_op;
+ int continue_srcs;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(iop_chan->device->common.dev,
+ "%s src_cnt: %d len: %u flags: %lx\n",
+ __func__, src_cnt, len, flags);
+
+ if (dmaf_p_disabled_continue(flags))
+ continue_srcs = 1+src_cnt;
+ else if (dmaf_continue(flags))
+ continue_srcs = 3+src_cnt;
+ else
+ continue_srcs = 0+src_cnt;
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ int i;
+
+ g = sw_desc->group_head;
+ iop_desc_set_byte_count(g, iop_chan, len);
+
+ /* even if P is disabled its destination address (bits
+ * [3:0]) must match Q. It is ok if P points to an
+ * invalid address, it won't be written.
+ */
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ dst[0] = dst[1] & 0x7;
+
+ iop_desc_set_pq_addr(g, dst);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ for (i = 0; i < src_cnt; i++)
+ iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
+
+ /* if we are continuing a previous operation factor in
+ * the old p and q values, see the comment for dma_maxpq
+ * in include/linux/dmaengine.h
+ */
+ if (dmaf_p_disabled_continue(flags))
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+ else if (dmaf_continue(flags)) {
+ iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
+ }
+ iop_desc_init_pq(g, i, flags);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, enum sum_check_flags *pqres,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *g;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+ __func__, src_cnt, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ /* for validate operations p and q are tagged onto the
+ * end of the source list
+ */
+ int pq_idx = src_cnt;
+
+ g = sw_desc->group_head;
+ iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
+ iop_desc_set_pq_zero_sum_byte_count(g, len);
+ g->pq_check_result = pqres;
+ pr_debug("\t%s: g->pq_check_result: %p\n",
+ __func__, g->pq_check_result);
+ sw_desc->unmap_src_cnt = src_cnt+2;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ while (src_cnt--)
+ iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
+ src[src_cnt],
+ scf[src_cnt]);
+ iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
static void iop_adma_free_chan_resources(struct dma_chan *chan)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
@@ -906,7 +1070,7 @@ out:
#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
static int __devinit
-iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
+iop_adma_xor_val_self_test(struct iop_adma_device *device)
{
int i, src_idx;
struct page *dest;
@@ -1002,7 +1166,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
PAGE_SIZE, DMA_TO_DEVICE);
/* skip zero sum if the capability is not present */
- if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
+ if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
goto free_resources;
/* zero sum the sources with the destintation page */
@@ -1016,10 +1180,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
dma_srcs[i] = dma_map_page(dma_chan->device->dev,
zero_sum_srcs[i], 0, PAGE_SIZE,
DMA_TO_DEVICE);
- tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
- IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
- &zero_sum_result,
- DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &zero_sum_result,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
@@ -1072,10 +1236,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
dma_srcs[i] = dma_map_page(dma_chan->device->dev,
zero_sum_srcs[i], 0, PAGE_SIZE,
DMA_TO_DEVICE);
- tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
- IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
- &zero_sum_result,
- DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &zero_sum_result,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
@@ -1105,6 +1269,170 @@ out:
return err;
}
+#ifdef CONFIG_MD_RAID6_PQ
+static int __devinit
+iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
+{
+ /* combined sources, software pq results, and extra hw pq results */
+ struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
+ /* ptr to the extra hw pq buffers defined above */
+ struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
+ /* address conversion buffers (dma_map / page_address) */
+ void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
+ dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
+ dma_addr_t pq_dest[2];
+
+ int i;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u32 zero_sum_result;
+ int err = 0;
+ struct device *dev;
+
+ dev_dbg(device->common.dev, "%s\n", __func__);
+
+ for (i = 0; i < ARRAY_SIZE(pq); i++) {
+ pq[i] = alloc_page(GFP_KERNEL);
+ if (!pq[i]) {
+ while (i--)
+ __free_page(pq[i]);
+ return -ENOMEM;
+ }
+ }
+
+ /* Fill in src buffers */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
+ pq_sw[i] = page_address(pq[i]);
+ memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
+ }
+ pq_sw[i] = page_address(pq[i]);
+ pq_sw[i+1] = page_address(pq[i+1]);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dev = dma_chan->device->dev;
+
+ /* initialize the dests */
+ memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
+ memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
+
+ /* test pq */
+ pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
+ IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
+ PAGE_SIZE,
+ DMA_PREP_INTERRUPT |
+ DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_err(dev, "Self-test pq timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
+
+ if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
+ page_address(pq_hw[0]), PAGE_SIZE) != 0) {
+ dev_err(dev, "Self-test p failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
+ page_address(pq_hw[1]), PAGE_SIZE) != 0) {
+ dev_err(dev, "Self-test q failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test correct zero sum using the software generated pq values */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ zero_sum_result = ~0;
+ tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+ pq_src, IOP_ADMA_NUM_SRC_TEST,
+ raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+ DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != 0) {
+ dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
+ zero_sum_result);
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test incorrect zero sum */
+ i = IOP_ADMA_NUM_SRC_TEST;
+ memset(pq_sw[i] + 100, 0, 100);
+ memset(pq_sw[i+1] + 200, 0, 200);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ zero_sum_result = 0;
+ tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+ pq_src, IOP_ADMA_NUM_SRC_TEST,
+ raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+ DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
+ dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
+ zero_sum_result);
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ iop_adma_free_chan_resources(dma_chan);
+out:
+ i = ARRAY_SIZE(pq);
+ while (i--)
+ __free_page(pq[i]);
+ return err;
+}
+#endif
+
static int __devexit iop_adma_remove(struct platform_device *dev)
{
struct iop_adma_device *device = platform_get_drvdata(dev);
@@ -1192,9 +1520,16 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
dma_dev->max_xor = iop_adma_get_max_xor();
dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
}
- if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
- dma_dev->device_prep_dma_zero_sum =
- iop_adma_prep_dma_zero_sum;
+ if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_xor_val =
+ iop_adma_prep_dma_xor_val;
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
+ dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
+ }
+ if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_pq_val =
+ iop_adma_prep_dma_pq_val;
if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
dma_dev->device_prep_dma_interrupt =
iop_adma_prep_dma_interrupt;
@@ -1248,23 +1583,35 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
}
if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
- dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
- ret = iop_adma_xor_zero_sum_self_test(adev);
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+ ret = iop_adma_xor_val_self_test(adev);
dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
if (ret)
goto err_free_iop_chan;
}
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
+ dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
+ #ifdef CONFIG_MD_RAID6_PQ
+ ret = iop_adma_pq_zero_sum_self_test(adev);
+ dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
+ #else
+ /* can not test raid6, so do not publish capability */
+ dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
+ dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
+ ret = 0;
+ #endif
+ if (ret)
+ goto err_free_iop_chan;
+ }
+
dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
- "( %s%s%s%s%s%s%s%s%s%s)\n",
- dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
- dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
- dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
+ "( %s%s%s%s%s%s%s)\n",
+ dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
+ dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
- dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
- dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
+ dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
- dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
@@ -1296,7 +1643,7 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
if (sw_desc) {
grp_start = sw_desc->group_head;
- list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+ list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
async_tx_ack(&sw_desc->async_tx);
iop_desc_init_memcpy(grp_start, 0);
iop_desc_set_byte_count(grp_start, iop_chan, 0);
@@ -1352,7 +1699,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
grp_start = sw_desc->group_head;
- list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+ list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
async_tx_ack(&sw_desc->async_tx);
iop_desc_init_null_xor(grp_start, 2, 0);
iop_desc_set_byte_count(grp_start, iop_chan, 0);
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
index 9f6fe46a9b8..c0a272c7368 100644
--- a/drivers/dma/iovlock.c
+++ b/drivers/dma/iovlock.c
@@ -183,6 +183,11 @@ dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
iov_byte_offset,
kdata,
copy);
+ /* poll for a descriptor slot */
+ if (unlikely(dma_cookie < 0)) {
+ dma_async_issue_pending(chan);
+ continue;
+ }
len -= copy;
iov[iovec_idx].iov_len -= copy;
@@ -248,6 +253,11 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
page,
offset,
copy);
+ /* poll for a descriptor slot */
+ if (unlikely(dma_cookie < 0)) {
+ dma_async_issue_pending(chan);
+ continue;
+ }
len -= copy;
iov[iovec_idx].iov_len -= copy;
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 3f23eabe09f..466ab10c1ff 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -517,7 +517,7 @@ retry:
}
alloc_tail->group_head = alloc_start;
alloc_tail->async_tx.cookie = -EBUSY;
- list_splice(&chain, &alloc_tail->async_tx.tx_list);
+ list_splice(&chain, &alloc_tail->tx_list);
mv_chan->last_used = last_used;
mv_desc_clear_next_desc(alloc_start);
mv_desc_clear_next_desc(alloc_tail);
@@ -565,14 +565,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
if (list_empty(&mv_chan->chain))
- list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+ list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
else {
new_hw_chain = 0;
old_chain_tail = list_entry(mv_chan->chain.prev,
struct mv_xor_desc_slot,
chain_node);
- list_splice_init(&grp_start->async_tx.tx_list,
+ list_splice_init(&grp_start->tx_list,
&old_chain_tail->chain_node);
if (!mv_can_chain(grp_start))
@@ -632,6 +632,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
slot->async_tx.tx_submit = mv_xor_tx_submit;
INIT_LIST_HEAD(&slot->chain_node);
INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->tx_list);
hw_desc = (char *) mv_chan->device->dma_desc_pool;
slot->async_tx.phys =
(dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index 06cafe1ef52..977b592e976 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -126,9 +126,8 @@ struct mv_xor_chan {
* @idx: pool index
* @unmap_src_cnt: number of xor sources
* @unmap_len: transaction bytecount
+ * @tx_list: list of slots that make up a multi-descriptor transaction
* @async_tx: support for the async_tx api
- * @group_list: list of slots that make up a multi-descriptor transaction
- * for example transfer lengths larger than the supported hw max
* @xor_check_result: result of zero sum
* @crc32_result: result crc calculation
*/
@@ -145,6 +144,7 @@ struct mv_xor_desc_slot {
u16 unmap_src_cnt;
u32 value;
size_t unmap_len;
+ struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
union {
u32 *xor_check_result;
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
new file mode 100644
index 00000000000..b3b065c4e5c
--- /dev/null
+++ b/drivers/dma/shdma.c
@@ -0,0 +1,786 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * base is drivers/dma/flsdma.c
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - MAX DMA size is 16MB.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/platform_device.h>
+#include <cpu/dma.h>
+#include <asm/dma-sh.h>
+#include "shdma.h"
+
+/* DMA descriptor control */
+#define DESC_LAST (-1)
+#define DESC_COMP (1)
+#define DESC_NCOMP (0)
+
+#define NR_DESCS_PER_CHANNEL 32
+/*
+ * Define the default configuration for dual address memory-memory transfer.
+ * The 0x400 value represents auto-request, external->external.
+ *
+ * And this driver set 4byte burst mode.
+ * If you want to change mode, you need to change RS_DEFAULT of value.
+ * (ex 1byte burst mode -> (RS_DUAL & ~TS_32)
+ */
+#define RS_DEFAULT (RS_DUAL)
+
+#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id])
+static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
+{
+ ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+}
+
+static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
+{
+ return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+}
+
+static void dmae_init(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = RS_DEFAULT; /* default is DUAL mode */
+ sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+/*
+ * Reset DMA controller
+ *
+ * SH7780 has two DMAOR register
+ */
+static void sh_dmae_ctl_stop(int id)
+{
+ unsigned short dmaor = dmaor_read_reg(id);
+
+ dmaor &= ~(DMAOR_NMIF | DMAOR_AE);
+ dmaor_write_reg(id, dmaor);
+}
+
+static int sh_dmae_rst(int id)
+{
+ unsigned short dmaor;
+
+ sh_dmae_ctl_stop(id);
+ dmaor = (dmaor_read_reg(id)|DMAOR_INIT);
+
+ dmaor_write_reg(id, dmaor);
+ if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) {
+ pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int dmae_is_idle(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+ if (chcr & CHCR_DE) {
+ if (!(chcr & CHCR_TE))
+ return -EBUSY; /* working */
+ }
+ return 0; /* waiting */
+}
+
+static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+ return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT];
+}
+
+static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw)
+{
+ sh_dmae_writel(sh_chan, hw.sar, SAR);
+ sh_dmae_writel(sh_chan, hw.dar, DAR);
+ sh_dmae_writel(sh_chan,
+ (hw.tcr >> calc_xmit_shift(sh_chan)), TCR);
+}
+
+static void dmae_start(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+ chcr |= (CHCR_DE|CHCR_IE);
+ sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+static void dmae_halt(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+ chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE);
+ sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
+{
+ int ret = dmae_is_idle(sh_chan);
+ /* When DMA was working, can not set data to CHCR */
+ if (ret)
+ return ret;
+
+ sh_dmae_writel(sh_chan, val, CHCR);
+ return 0;
+}
+
+#define DMARS1_ADDR 0x04
+#define DMARS2_ADDR 0x08
+#define DMARS_SHIFT 8
+#define DMARS_CHAN_MSK 0x01
+static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
+{
+ u32 addr;
+ int shift = 0;
+ int ret = dmae_is_idle(sh_chan);
+ if (ret)
+ return ret;
+
+ if (sh_chan->id & DMARS_CHAN_MSK)
+ shift = DMARS_SHIFT;
+
+ switch (sh_chan->id) {
+ /* DMARS0 */
+ case 0:
+ case 1:
+ addr = SH_DMARS_BASE;
+ break;
+ /* DMARS1 */
+ case 2:
+ case 3:
+ addr = (SH_DMARS_BASE + DMARS1_ADDR);
+ break;
+ /* DMARS2 */
+ case 4:
+ case 5:
+ addr = (SH_DMARS_BASE + DMARS2_ADDR);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ctrl_outw((val << shift) |
+ (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)),
+ addr);
+
+ return 0;
+}
+
+static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct sh_desc *desc = tx_to_sh_desc(tx);
+ struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+
+ cookie = sh_chan->common.cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+
+ /* If desc only in the case of 1 */
+ if (desc->async_tx.cookie != -EBUSY)
+ desc->async_tx.cookie = cookie;
+ sh_chan->common.cookie = desc->async_tx.cookie;
+
+ list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev);
+
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ return cookie;
+}
+
+static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
+{
+ struct sh_desc *desc, *_desc, *ret = NULL;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) {
+ if (async_tx_test_ack(&desc->async_tx)) {
+ list_del(&desc->node);
+ ret = desc;
+ break;
+ }
+ }
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ return ret;
+}
+
+static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc)
+{
+ if (desc) {
+ spin_lock_bh(&sh_chan->desc_lock);
+
+ list_splice_init(&desc->tx_list, &sh_chan->ld_free);
+ list_add(&desc->node, &sh_chan->ld_free);
+
+ spin_unlock_bh(&sh_chan->desc_lock);
+ }
+}
+
+static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ struct sh_desc *desc;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
+ spin_unlock_bh(&sh_chan->desc_lock);
+ desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL);
+ if (!desc) {
+ spin_lock_bh(&sh_chan->desc_lock);
+ break;
+ }
+ dma_async_tx_descriptor_init(&desc->async_tx,
+ &sh_chan->common);
+ desc->async_tx.tx_submit = sh_dmae_tx_submit;
+ desc->async_tx.flags = DMA_CTRL_ACK;
+ INIT_LIST_HEAD(&desc->tx_list);
+ sh_dmae_put_desc(sh_chan, desc);
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ sh_chan->descs_allocated++;
+ }
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ return sh_chan->descs_allocated;
+}
+
+/*
+ * sh_dma_free_chan_resources - Free all resources of the channel.
+ */
+static void sh_dmae_free_chan_resources(struct dma_chan *chan)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ struct sh_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ BUG_ON(!list_empty(&sh_chan->ld_queue));
+ spin_lock_bh(&sh_chan->desc_lock);
+
+ list_splice_init(&sh_chan->ld_free, &list);
+ sh_chan->descs_allocated = 0;
+
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ list_for_each_entry_safe(desc, _desc, &list, node)
+ kfree(desc);
+}
+
+static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
+ struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+ size_t len, unsigned long flags)
+{
+ struct sh_dmae_chan *sh_chan;
+ struct sh_desc *first = NULL, *prev = NULL, *new;
+ size_t copy_size;
+
+ if (!chan)
+ return NULL;
+
+ if (!len)
+ return NULL;
+
+ sh_chan = to_sh_chan(chan);
+
+ do {
+ /* Allocate the link descriptor from DMA pool */
+ new = sh_dmae_get_desc(sh_chan);
+ if (!new) {
+ dev_err(sh_chan->dev,
+ "No free memory for link descriptor\n");
+ goto err_get_desc;
+ }
+
+ copy_size = min(len, (size_t)SH_DMA_TCR_MAX);
+
+ new->hw.sar = dma_src;
+ new->hw.dar = dma_dest;
+ new->hw.tcr = copy_size;
+ if (!first)
+ first = new;
+
+ new->mark = DESC_NCOMP;
+ async_tx_ack(&new->async_tx);
+
+ prev = new;
+ len -= copy_size;
+ dma_src += copy_size;
+ dma_dest += copy_size;
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+ } while (len);
+
+ new->async_tx.flags = flags; /* client is in control of this ack */
+ new->async_tx.cookie = -EBUSY; /* Last desc */
+
+ return &first->async_tx;
+
+err_get_desc:
+ sh_dmae_put_desc(sh_chan, first);
+ return NULL;
+
+}
+
+/*
+ * sh_chan_ld_cleanup - Clean up link descriptors
+ *
+ * This function clean up the ld_queue of DMA channel.
+ */
+static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan)
+{
+ struct sh_desc *desc, *_desc;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
+ dma_async_tx_callback callback;
+ void *callback_param;
+
+ /* non send data */
+ if (desc->mark == DESC_NCOMP)
+ break;
+
+ /* send data sesc */
+ callback = desc->async_tx.callback;
+ callback_param = desc->async_tx.callback_param;
+
+ /* Remove from ld_queue list */
+ list_splice_init(&desc->tx_list, &sh_chan->ld_free);
+
+ dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n",
+ desc);
+
+ list_move(&desc->node, &sh_chan->ld_free);
+ /* Run the link descriptor callback function */
+ if (callback) {
+ spin_unlock_bh(&sh_chan->desc_lock);
+ dev_dbg(sh_chan->dev, "link descriptor %p callback\n",
+ desc);
+ callback(callback_param);
+ spin_lock_bh(&sh_chan->desc_lock);
+ }
+ }
+ spin_unlock_bh(&sh_chan->desc_lock);
+}
+
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
+{
+ struct list_head *ld_node;
+ struct sh_dmae_regs hw;
+
+ /* DMA work check */
+ if (dmae_is_idle(sh_chan))
+ return;
+
+ /* Find the first un-transfer desciptor */
+ for (ld_node = sh_chan->ld_queue.next;
+ (ld_node != &sh_chan->ld_queue)
+ && (to_sh_desc(ld_node)->mark == DESC_COMP);
+ ld_node = ld_node->next)
+ cpu_relax();
+
+ if (ld_node != &sh_chan->ld_queue) {
+ /* Get the ld start address from ld_queue */
+ hw = to_sh_desc(ld_node)->hw;
+ dmae_set_reg(sh_chan, hw);
+ dmae_start(sh_chan);
+ }
+}
+
+static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ sh_chan_xfer_ld_queue(sh_chan);
+}
+
+static enum dma_status sh_dmae_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+
+ sh_dmae_chan_ld_cleanup(sh_chan);
+
+ last_used = chan->cookie;
+ last_complete = sh_chan->completed_cookie;
+ if (last_complete == -EBUSY)
+ last_complete = last_used;
+
+ if (done)
+ *done = last_complete;
+
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t sh_dmae_interrupt(int irq, void *data)
+{
+ irqreturn_t ret = IRQ_NONE;
+ struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+ if (chcr & CHCR_TE) {
+ /* DMA stop */
+ dmae_halt(sh_chan);
+
+ ret = IRQ_HANDLED;
+ tasklet_schedule(&sh_chan->tasklet);
+ }
+
+ return ret;
+}
+
+#if defined(CONFIG_CPU_SH4)
+static irqreturn_t sh_dmae_err(int irq, void *data)
+{
+ int err = 0;
+ struct sh_dmae_device *shdev = (struct sh_dmae_device *)data;
+
+ /* IRQ Multi */
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ int cnt = 0;
+ switch (irq) {
+#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
+ case DMTE6_IRQ:
+ cnt++;
+#endif
+ case DMTE0_IRQ:
+ if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) {
+ disable_irq(irq);
+ return IRQ_HANDLED;
+ }
+ default:
+ return IRQ_NONE;
+ }
+ } else {
+ /* reset dma controller */
+ err = sh_dmae_rst(0);
+ if (err)
+ return err;
+ if (shdev->pdata.mode & SHDMA_DMAOR1) {
+ err = sh_dmae_rst(1);
+ if (err)
+ return err;
+ }
+ disable_irq(irq);
+ return IRQ_HANDLED;
+ }
+}
+#endif
+
+static void dmae_do_tasklet(unsigned long data)
+{
+ struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+ struct sh_desc *desc, *_desc, *cur_desc = NULL;
+ u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
+ list_for_each_entry_safe(desc, _desc,
+ &sh_chan->ld_queue, node) {
+ if ((desc->hw.sar + desc->hw.tcr) == sar_buf) {
+ cur_desc = desc;
+ break;
+ }
+ }
+
+ if (cur_desc) {
+ switch (cur_desc->async_tx.cookie) {
+ case 0: /* other desc data */
+ break;
+ case -EBUSY: /* last desc */
+ sh_chan->completed_cookie =
+ cur_desc->async_tx.cookie;
+ break;
+ default: /* first desc ( 0 < )*/
+ sh_chan->completed_cookie =
+ cur_desc->async_tx.cookie - 1;
+ break;
+ }
+ cur_desc->mark = DESC_COMP;
+ }
+ /* Next desc */
+ sh_chan_xfer_ld_queue(sh_chan);
+ sh_dmae_chan_ld_cleanup(sh_chan);
+}
+
+static unsigned int get_dmae_irq(unsigned int id)
+{
+ unsigned int irq = 0;
+ if (id < ARRAY_SIZE(dmte_irq_map))
+ irq = dmte_irq_map[id];
+ return irq;
+}
+
+static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id)
+{
+ int err;
+ unsigned int irq = get_dmae_irq(id);
+ unsigned long irqflags = IRQF_DISABLED;
+ struct sh_dmae_chan *new_sh_chan;
+
+ /* alloc channel */
+ new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
+ if (!new_sh_chan) {
+ dev_err(shdev->common.dev, "No free memory for allocating "
+ "dma channels!\n");
+ return -ENOMEM;
+ }
+
+ new_sh_chan->dev = shdev->common.dev;
+ new_sh_chan->id = id;
+
+ /* Init DMA tasklet */
+ tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
+ (unsigned long)new_sh_chan);
+
+ /* Init the channel */
+ dmae_init(new_sh_chan);
+
+ spin_lock_init(&new_sh_chan->desc_lock);
+
+ /* Init descripter manage list */
+ INIT_LIST_HEAD(&new_sh_chan->ld_queue);
+ INIT_LIST_HEAD(&new_sh_chan->ld_free);
+
+ /* copy struct dma_device */
+ new_sh_chan->common.device = &shdev->common;
+
+ /* Add the channel to DMA device channel list */
+ list_add_tail(&new_sh_chan->common.device_node,
+ &shdev->common.channels);
+ shdev->common.chancnt++;
+
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ irqflags = IRQF_SHARED;
+#if defined(DMTE6_IRQ)
+ if (irq >= DMTE6_IRQ)
+ irq = DMTE6_IRQ;
+ else
+#endif
+ irq = DMTE0_IRQ;
+ }
+
+ snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
+ "sh-dmae%d", new_sh_chan->id);
+
+ /* set up channel irq */
+ err = request_irq(irq, &sh_dmae_interrupt,
+ irqflags, new_sh_chan->dev_id, new_sh_chan);
+ if (err) {
+ dev_err(shdev->common.dev, "DMA channel %d request_irq error "
+ "with return %d\n", id, err);
+ goto err_no_irq;
+ }
+
+ /* CHCR register control function */
+ new_sh_chan->set_chcr = dmae_set_chcr;
+ /* DMARS register control function */
+ new_sh_chan->set_dmars = dmae_set_dmars;
+
+ shdev->chan[id] = new_sh_chan;
+ return 0;
+
+err_no_irq:
+ /* remove from dmaengine device node */
+ list_del(&new_sh_chan->common.device_node);
+ kfree(new_sh_chan);
+ return err;
+}
+
+static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
+{
+ int i;
+
+ for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
+ if (shdev->chan[i]) {
+ struct sh_dmae_chan *shchan = shdev->chan[i];
+ if (!(shdev->pdata.mode & SHDMA_MIX_IRQ))
+ free_irq(dmte_irq_map[i], shchan);
+
+ list_del(&shchan->common.device_node);
+ kfree(shchan);
+ shdev->chan[i] = NULL;
+ }
+ }
+ shdev->common.chancnt = 0;
+}
+
+static int __init sh_dmae_probe(struct platform_device *pdev)
+{
+ int err = 0, cnt, ecnt;
+ unsigned long irqflags = IRQF_DISABLED;
+#if defined(CONFIG_CPU_SH4)
+ int eirq[] = { DMAE0_IRQ,
+#if defined(DMAE1_IRQ)
+ DMAE1_IRQ
+#endif
+ };
+#endif
+ struct sh_dmae_device *shdev;
+
+ shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
+ if (!shdev) {
+ dev_err(&pdev->dev, "No enough memory\n");
+ err = -ENOMEM;
+ goto shdev_err;
+ }
+
+ /* get platform data */
+ if (!pdev->dev.platform_data)
+ goto shdev_err;
+
+ /* platform data */
+ memcpy(&shdev->pdata, pdev->dev.platform_data,
+ sizeof(struct sh_dmae_pdata));
+
+ /* reset dma controller */
+ err = sh_dmae_rst(0);
+ if (err)
+ goto rst_err;
+
+ /* SH7780/85/23 has DMAOR1 */
+ if (shdev->pdata.mode & SHDMA_DMAOR1) {
+ err = sh_dmae_rst(1);
+ if (err)
+ goto rst_err;
+ }
+
+ INIT_LIST_HEAD(&shdev->common.channels);
+
+ dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
+ shdev->common.device_alloc_chan_resources
+ = sh_dmae_alloc_chan_resources;
+ shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
+ shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
+ shdev->common.device_is_tx_complete = sh_dmae_is_complete;
+ shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
+ shdev->common.dev = &pdev->dev;
+
+#if defined(CONFIG_CPU_SH4)
+ /* Non Mix IRQ mode SH7722/SH7730 etc... */
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ irqflags = IRQF_SHARED;
+ eirq[0] = DMTE0_IRQ;
+#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
+ eirq[1] = DMTE6_IRQ;
+#endif
+ }
+
+ for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) {
+ err = request_irq(eirq[ecnt], sh_dmae_err,
+ irqflags, "DMAC Address Error", shdev);
+ if (err) {
+ dev_err(&pdev->dev, "DMA device request_irq"
+ "error (irq %d) with return %d\n",
+ eirq[ecnt], err);
+ goto eirq_err;
+ }
+ }
+#endif /* CONFIG_CPU_SH4 */
+
+ /* Create DMA Channel */
+ for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) {
+ err = sh_dmae_chan_probe(shdev, cnt);
+ if (err)
+ goto chan_probe_err;
+ }
+
+ platform_set_drvdata(pdev, shdev);
+ dma_async_device_register(&shdev->common);
+
+ return err;
+
+chan_probe_err:
+ sh_dmae_chan_remove(shdev);
+
+eirq_err:
+ for (ecnt-- ; ecnt >= 0; ecnt--)
+ free_irq(eirq[ecnt], shdev);
+
+rst_err:
+ kfree(shdev);
+
+shdev_err:
+ return err;
+}
+
+static int __exit sh_dmae_remove(struct platform_device *pdev)
+{
+ struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+
+ dma_async_device_unregister(&shdev->common);
+
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ free_irq(DMTE0_IRQ, shdev);
+#if defined(DMTE6_IRQ)
+ free_irq(DMTE6_IRQ, shdev);
+#endif
+ }
+
+ /* channel data remove */
+ sh_dmae_chan_remove(shdev);
+
+ if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) {
+ free_irq(DMAE0_IRQ, shdev);
+#if defined(DMAE1_IRQ)
+ free_irq(DMAE1_IRQ, shdev);
+#endif
+ }
+ kfree(shdev);
+
+ return 0;
+}
+
+static void sh_dmae_shutdown(struct platform_device *pdev)
+{
+ struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+ sh_dmae_ctl_stop(0);
+ if (shdev->pdata.mode & SHDMA_DMAOR1)
+ sh_dmae_ctl_stop(1);
+}
+
+static struct platform_driver sh_dmae_driver = {
+ .remove = __exit_p(sh_dmae_remove),
+ .shutdown = sh_dmae_shutdown,
+ .driver = {
+ .name = "sh-dma-engine",
+ },
+};
+
+static int __init sh_dmae_init(void)
+{
+ return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
+}
+module_init(sh_dmae_init);
+
+static void __exit sh_dmae_exit(void)
+{
+ platform_driver_unregister(&sh_dmae_driver);
+}
+module_exit(sh_dmae_exit);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
+MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h
new file mode 100644
index 00000000000..2b4bc15a2c0
--- /dev/null
+++ b/drivers/dma/shdma.h
@@ -0,0 +1,64 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_SHDMA_H
+#define __DMA_SHDMA_H
+
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+#define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */
+
+struct sh_dmae_regs {
+ u32 sar; /* SAR / source address */
+ u32 dar; /* DAR / destination address */
+ u32 tcr; /* TCR / transfer count */
+};
+
+struct sh_desc {
+ struct list_head tx_list;
+ struct sh_dmae_regs hw;
+ struct list_head node;
+ struct dma_async_tx_descriptor async_tx;
+ int mark;
+};
+
+struct sh_dmae_chan {
+ dma_cookie_t completed_cookie; /* The maximum cookie completed */
+ spinlock_t desc_lock; /* Descriptor operation lock */
+ struct list_head ld_queue; /* Link descriptors queue */
+ struct list_head ld_free; /* Link descriptors free */
+ struct dma_chan common; /* DMA common channel */
+ struct device *dev; /* Channel device */
+ struct tasklet_struct tasklet; /* Tasklet */
+ int descs_allocated; /* desc count */
+ int id; /* Raw id of this channel */
+ char dev_id[16]; /* unique name per DMAC of channel */
+
+ /* Set chcr */
+ int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs);
+ /* Set DMA resource */
+ int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res);
+};
+
+struct sh_dmae_device {
+ struct dma_device common;
+ struct sh_dmae_chan *chan[MAX_DMA_CHANNELS];
+ struct sh_dmae_pdata pdata;
+};
+
+#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common)
+#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
+#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
+
+#endif /* __DMA_SHDMA_H */
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index 7837930146a..fb6bb64e886 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -180,9 +180,8 @@ static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc)
static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
{
- if (!list_empty(&desc->txd.tx_list))
- desc = list_entry(desc->txd.tx_list.prev,
- struct txx9dmac_desc, desc_node);
+ if (!list_empty(&desc->tx_list))
+ desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
return desc;
}
@@ -197,6 +196,7 @@ static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc,
desc = kzalloc(sizeof(*desc), flags);
if (!desc)
return NULL;
+ INIT_LIST_HEAD(&desc->tx_list);
dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
desc->txd.tx_submit = txx9dmac_tx_submit;
/* txd.flags will be overwritten in prep funcs */
@@ -245,7 +245,7 @@ static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc,
struct txx9dmac_dev *ddev = dc->ddev;
struct txx9dmac_desc *child;
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
dma_sync_single_for_cpu(chan2parent(&dc->chan),
child->txd.phys, ddev->descsize,
DMA_TO_DEVICE);
@@ -267,11 +267,11 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
txx9dmac_sync_desc_for_cpu(dc, desc);
spin_lock_bh(&dc->lock);
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
dev_vdbg(chan2dev(&dc->chan),
"moving child desc %p to freelist\n",
child);
- list_splice_init(&desc->txd.tx_list, &dc->free_list);
+ list_splice_init(&desc->tx_list, &dc->free_list);
dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
desc);
list_add(&desc->desc_node, &dc->free_list);
@@ -429,7 +429,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
param = txd->callback_param;
txx9dmac_sync_desc_for_cpu(dc, desc);
- list_splice_init(&txd->tx_list, &dc->free_list);
+ list_splice_init(&desc->tx_list, &dc->free_list);
list_move(&desc->desc_node, &dc->free_list);
if (!ds) {
@@ -571,7 +571,7 @@ static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr)
"Bad descriptor submitted for DMA! (cookie: %d)\n",
bad_desc->txd.cookie);
txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
- list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
txx9dmac_dump_desc(dc, &child->hwdesc);
/* Pretend the descriptor completed successfully */
txx9dmac_descriptor_complete(dc, bad_desc);
@@ -613,7 +613,7 @@ static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc)
return;
}
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
if (desc_read_CHAR(dc, child) == chain) {
/* Currently in progress */
if (csr & TXX9_DMA_CSR_ABCHC)
@@ -823,8 +823,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
dma_sync_single_for_device(chan2parent(&dc->chan),
prev->txd.phys, ddev->descsize,
DMA_TO_DEVICE);
- list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ list_add_tail(&desc->desc_node, &first->tx_list);
}
prev = desc;
}
@@ -919,8 +918,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
prev->txd.phys,
ddev->descsize,
DMA_TO_DEVICE);
- list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ list_add_tail(&desc->desc_node, &first->tx_list);
}
prev = desc;
}
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h
index c907ff01d27..365d42366b9 100644
--- a/drivers/dma/txx9dmac.h
+++ b/drivers/dma/txx9dmac.h
@@ -231,6 +231,7 @@ struct txx9dmac_desc {
/* THEN values for driver housekeeping */
struct list_head desc_node ____cacheline_aligned;
+ struct list_head tx_list;
struct dma_async_tx_descriptor txd;
size_t len;
};
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index a3ca18e2d7c..02127e59fe8 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -133,6 +133,13 @@ config EDAC_I3000
Support for error detection and correction on the Intel
3000 and 3010 server chipsets.
+config EDAC_I3200
+ tristate "Intel 3200"
+ depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL
+ help
+ Support for error detection and correction on the Intel
+ 3200 and 3210 server chipsets.
+
config EDAC_X38
tristate "Intel X38"
depends on EDAC_MM_EDAC && PCI && X86
@@ -176,11 +183,11 @@ config EDAC_I5100
San Clemente MCH.
config EDAC_MPC85XX
- tristate "Freescale MPC85xx"
- depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
+ tristate "Freescale MPC83xx / MPC85xx"
+ depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || MPC85xx)
help
Support for error detection and correction on the Freescale
- MPC8560, MPC8540, MPC8548
+ MPC8349, MPC8560, MPC8540, MPC8548
config EDAC_MV64X60
tristate "Marvell MV64x60"
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index cfa033ce53a..7a473bbe8ab 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o
obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o
obj-$(CONFIG_EDAC_I3000) += i3000_edac.o
+obj-$(CONFIG_EDAC_I3200) += i3200_edac.o
obj-$(CONFIG_EDAC_X38) += x38_edac.o
obj-$(CONFIG_EDAC_I82860) += i82860_edac.o
obj-$(CONFIG_EDAC_R82600) += r82600_edac.o
@@ -49,3 +50,4 @@ obj-$(CONFIG_EDAC_CELL) += cell_edac.o
obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
+
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index 8c54196b5ab..3d50274f134 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -885,14 +885,14 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdev->name)) {
cpc925_printk(KERN_ERR, "Unable to request mem region\n");
res = -EBUSY;
goto err1;
}
- vbase = devm_ioremap(&pdev->dev, r->start, r->end - r->start + 1);
+ vbase = devm_ioremap(&pdev->dev, r->start, resource_size(r));
if (!vbase) {
cpc925_printk(KERN_ERR, "Unable to ioremap device\n");
res = -ENOMEM;
@@ -953,7 +953,7 @@ err3:
cpc925_mc_exit(mci);
edac_mc_free(mci);
err2:
- devm_release_mem_region(&pdev->dev, r->start, r->end-r->start+1);
+ devm_release_mem_region(&pdev->dev, r->start, resource_size(r));
err1:
devres_release_group(&pdev->dev, cpc925_probe);
out:
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index b02a6a69a8f..d5e13c94714 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -356,7 +356,6 @@ static void complete_edac_device_list_del(struct rcu_head *head)
edac_dev = container_of(head, struct edac_device_ctl_info, rcu);
INIT_LIST_HEAD(&edac_dev->link);
- complete(&edac_dev->removal_complete);
}
/*
@@ -369,10 +368,8 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info
*edac_device)
{
list_del_rcu(&edac_device->link);
-
- init_completion(&edac_device->removal_complete);
call_rcu(&edac_device->rcu, complete_edac_device_list_del);
- wait_for_completion(&edac_device->removal_complete);
+ rcu_barrier();
}
/*
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 335b7ebdb11..b629c41756f 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -418,16 +418,14 @@ static void complete_mc_list_del(struct rcu_head *head)
mci = container_of(head, struct mem_ctl_info, rcu);
INIT_LIST_HEAD(&mci->link);
- complete(&mci->complete);
}
static void del_mc_from_global_list(struct mem_ctl_info *mci)
{
atomic_dec(&edac_handlers);
list_del_rcu(&mci->link);
- init_completion(&mci->complete);
call_rcu(&mci->rcu, complete_mc_list_del);
- wait_for_completion(&mci->complete);
+ rcu_barrier();
}
/**
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 30b585b1d60..efb5d565078 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -174,7 +174,6 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
pci = container_of(head, struct edac_pci_ctl_info, rcu);
INIT_LIST_HEAD(&pci->link);
- complete(&pci->complete);
}
/*
@@ -185,9 +184,8 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci)
{
list_del_rcu(&pci->link);
- init_completion(&pci->complete);
call_rcu(&pci->rcu, complete_edac_pci_list_del);
- wait_for_completion(&pci->complete);
+ rcu_barrier();
}
#if 0
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
new file mode 100644
index 00000000000..fde4db91c4d
--- /dev/null
+++ b/drivers/edac/i3200_edac.c
@@ -0,0 +1,527 @@
+/*
+ * Intel 3200/3210 Memory Controller kernel module
+ * Copyright (C) 2008-2009 Akamai Technologies, Inc.
+ * Portions by Hitoshi Mitake <h.mitake@gmail.com>.
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/io.h>
+#include "edac_core.h"
+
+#define I3200_REVISION "1.1"
+
+#define EDAC_MOD_STR "i3200_edac"
+
+#define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0
+
+#define I3200_RANKS 8
+#define I3200_RANKS_PER_CHANNEL 4
+#define I3200_CHANNELS 2
+
+/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
+
+#define I3200_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */
+#define I3200_MCHBAR_HIGH 0x4c
+#define I3200_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */
+#define I3200_MMR_WINDOW_SIZE 16384
+
+#define I3200_TOM 0xa0 /* Top of Memory (16b)
+ *
+ * 15:10 reserved
+ * 9:0 total populated physical memory
+ */
+#define I3200_TOM_MASK 0x3ff /* bits 9:0 */
+#define I3200_TOM_SHIFT 26 /* 64MiB grain */
+
+#define I3200_ERRSTS 0xc8 /* Error Status Register (16b)
+ *
+ * 15 reserved
+ * 14 Isochronous TBWRR Run Behind FIFO Full
+ * (ITCV)
+ * 13 Isochronous TBWRR Run Behind FIFO Put
+ * (ITSTV)
+ * 12 reserved
+ * 11 MCH Thermal Sensor Event
+ * for SMI/SCI/SERR (GTSE)
+ * 10 reserved
+ * 9 LOCK to non-DRAM Memory Flag (LCKF)
+ * 8 reserved
+ * 7 DRAM Throttle Flag (DTF)
+ * 6:2 reserved
+ * 1 Multi-bit DRAM ECC Error Flag (DMERR)
+ * 0 Single-bit DRAM ECC Error Flag (DSERR)
+ */
+#define I3200_ERRSTS_UE 0x0002
+#define I3200_ERRSTS_CE 0x0001
+#define I3200_ERRSTS_BITS (I3200_ERRSTS_UE | I3200_ERRSTS_CE)
+
+
+/* Intel MMIO register space - device 0 function 0 - MMR space */
+
+#define I3200_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4)
+ *
+ * 15:10 reserved
+ * 9:0 Channel 0 DRAM Rank Boundary Address
+ */
+#define I3200_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */
+#define I3200_DRB_MASK 0x3ff /* bits 9:0 */
+#define I3200_DRB_SHIFT 26 /* 64MiB grain */
+
+#define I3200_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b)
+ *
+ * 63:48 Error Column Address (ERRCOL)
+ * 47:32 Error Row Address (ERRROW)
+ * 31:29 Error Bank Address (ERRBANK)
+ * 28:27 Error Rank Address (ERRRANK)
+ * 26:24 reserved
+ * 23:16 Error Syndrome (ERRSYND)
+ * 15: 2 reserved
+ * 1 Multiple Bit Error Status (MERRSTS)
+ * 0 Correctable Error Status (CERRSTS)
+ */
+#define I3200_C1ECCERRLOG 0x680 /* Chan 1 ECC Error Log (64b) */
+#define I3200_ECCERRLOG_CE 0x1
+#define I3200_ECCERRLOG_UE 0x2
+#define I3200_ECCERRLOG_RANK_BITS 0x18000000
+#define I3200_ECCERRLOG_RANK_SHIFT 27
+#define I3200_ECCERRLOG_SYNDROME_BITS 0xff0000
+#define I3200_ECCERRLOG_SYNDROME_SHIFT 16
+#define I3200_CAPID0 0xe0 /* P.95 of spec for details */
+
+struct i3200_priv {
+ void __iomem *window;
+};
+
+static int nr_channels;
+
+static int how_many_channels(struct pci_dev *pdev)
+{
+ unsigned char capid0_8b; /* 8th byte of CAPID0 */
+
+ pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
+ if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
+ debugf0("In single channel mode.\n");
+ return 1;
+ } else {
+ debugf0("In dual channel mode.\n");
+ return 2;
+ }
+}
+
+static unsigned long eccerrlog_syndrome(u64 log)
+{
+ return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
+ I3200_ECCERRLOG_SYNDROME_SHIFT;
+}
+
+static int eccerrlog_row(int channel, u64 log)
+{
+ u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
+ I3200_ECCERRLOG_RANK_SHIFT);
+ return rank | (channel * I3200_RANKS_PER_CHANNEL);
+}
+
+enum i3200_chips {
+ I3200 = 0,
+};
+
+struct i3200_dev_info {
+ const char *ctl_name;
+};
+
+struct i3200_error_info {
+ u16 errsts;
+ u16 errsts2;
+ u64 eccerrlog[I3200_CHANNELS];
+};
+
+static const struct i3200_dev_info i3200_devs[] = {
+ [I3200] = {
+ .ctl_name = "i3200"
+ },
+};
+
+static struct pci_dev *mci_pdev;
+static int i3200_registered = 1;
+
+
+static void i3200_clear_error_info(struct mem_ctl_info *mci)
+{
+ struct pci_dev *pdev;
+
+ pdev = to_pci_dev(mci->dev);
+
+ /*
+ * Clear any error bits.
+ * (Yes, we really clear bits by writing 1 to them.)
+ */
+ pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
+ I3200_ERRSTS_BITS);
+}
+
+static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
+ struct i3200_error_info *info)
+{
+ struct pci_dev *pdev;
+ struct i3200_priv *priv = mci->pvt_info;
+ void __iomem *window = priv->window;
+
+ pdev = to_pci_dev(mci->dev);
+
+ /*
+ * This is a mess because there is no atomic way to read all the
+ * registers at once and the registers can transition from CE being
+ * overwritten by UE.
+ */
+ pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
+ if (!(info->errsts & I3200_ERRSTS_BITS))
+ return;
+
+ info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
+ if (nr_channels == 2)
+ info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
+
+ pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
+
+ /*
+ * If the error is the same for both reads then the first set
+ * of reads is valid. If there is a change then there is a CE
+ * with no info and the second set of reads is valid and
+ * should be UE info.
+ */
+ if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
+ info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
+ if (nr_channels == 2)
+ info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
+ }
+
+ i3200_clear_error_info(mci);
+}
+
+static void i3200_process_error_info(struct mem_ctl_info *mci,
+ struct i3200_error_info *info)
+{
+ int channel;
+ u64 log;
+
+ if (!(info->errsts & I3200_ERRSTS_BITS))
+ return;
+
+ if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
+ edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ info->errsts = info->errsts2;
+ }
+
+ for (channel = 0; channel < nr_channels; channel++) {
+ log = info->eccerrlog[channel];
+ if (log & I3200_ECCERRLOG_UE) {
+ edac_mc_handle_ue(mci, 0, 0,
+ eccerrlog_row(channel, log),
+ "i3200 UE");
+ } else if (log & I3200_ECCERRLOG_CE) {
+ edac_mc_handle_ce(mci, 0, 0,
+ eccerrlog_syndrome(log),
+ eccerrlog_row(channel, log), 0,
+ "i3200 CE");
+ }
+ }
+}
+
+static void i3200_check(struct mem_ctl_info *mci)
+{
+ struct i3200_error_info info;
+
+ debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+ i3200_get_and_clear_error_info(mci, &info);
+ i3200_process_error_info(mci, &info);
+}
+
+
+void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
+{
+ union {
+ u64 mchbar;
+ struct {
+ u32 mchbar_low;
+ u32 mchbar_high;
+ };
+ } u;
+ void __iomem *window;
+
+ pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
+ pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
+ u.mchbar &= I3200_MCHBAR_MASK;
+
+ if (u.mchbar != (resource_size_t)u.mchbar) {
+ printk(KERN_ERR
+ "i3200: mmio space beyond accessible range (0x%llx)\n",
+ (unsigned long long)u.mchbar);
+ return NULL;
+ }
+
+ window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
+ if (!window)
+ printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
+ (unsigned long long)u.mchbar);
+
+ return window;
+}
+
+
+static void i3200_get_drbs(void __iomem *window,
+ u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
+{
+ int i;
+
+ for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
+ drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
+ drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
+ }
+}
+
+static bool i3200_is_stacked(struct pci_dev *pdev,
+ u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
+{
+ u16 tom;
+
+ pci_read_config_word(pdev, I3200_TOM, &tom);
+ tom &= I3200_TOM_MASK;
+
+ return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
+}
+
+static unsigned long drb_to_nr_pages(
+ u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
+ int channel, int rank)
+{
+ int n;
+
+ n = drbs[channel][rank];
+ if (rank > 0)
+ n -= drbs[channel][rank - 1];
+ if (stacked && (channel == 1) &&
+ drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
+ n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
+
+ n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
+ return n;
+}
+
+static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
+{
+ int rc;
+ int i;
+ struct mem_ctl_info *mci = NULL;
+ unsigned long last_page;
+ u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
+ bool stacked;
+ void __iomem *window;
+ struct i3200_priv *priv;
+
+ debugf0("MC: %s()\n", __func__);
+
+ window = i3200_map_mchbar(pdev);
+ if (!window)
+ return -ENODEV;
+
+ i3200_get_drbs(window, drbs);
+ nr_channels = how_many_channels(pdev);
+
+ mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS,
+ nr_channels, 0);
+ if (!mci)
+ return -ENOMEM;
+
+ debugf3("MC: %s(): init mci\n", __func__);
+
+ mci->dev = &pdev->dev;
+ mci->mtype_cap = MEM_FLAG_DDR2;
+
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+
+ mci->mod_name = EDAC_MOD_STR;
+ mci->mod_ver = I3200_REVISION;
+ mci->ctl_name = i3200_devs[dev_idx].ctl_name;
+ mci->dev_name = pci_name(pdev);
+ mci->edac_check = i3200_check;
+ mci->ctl_page_to_phys = NULL;
+ priv = mci->pvt_info;
+ priv->window = window;
+
+ stacked = i3200_is_stacked(pdev, drbs);
+
+ /*
+ * The dram rank boundary (DRB) reg values are boundary addresses
+ * for each DRAM rank with a granularity of 64MB. DRB regs are
+ * cumulative; the last one will contain the total memory
+ * contained in all ranks.
+ */
+ last_page = -1UL;
+ for (i = 0; i < mci->nr_csrows; i++) {
+ unsigned long nr_pages;
+ struct csrow_info *csrow = &mci->csrows[i];
+
+ nr_pages = drb_to_nr_pages(drbs, stacked,
+ i / I3200_RANKS_PER_CHANNEL,
+ i % I3200_RANKS_PER_CHANNEL);
+
+ if (nr_pages == 0) {
+ csrow->mtype = MEM_EMPTY;
+ continue;
+ }
+
+ csrow->first_page = last_page + 1;
+ last_page += nr_pages;
+ csrow->last_page = last_page;
+ csrow->nr_pages = nr_pages;
+
+ csrow->grain = nr_pages << PAGE_SHIFT;
+ csrow->mtype = MEM_DDR2;
+ csrow->dtype = DEV_UNKNOWN;
+ csrow->edac_mode = EDAC_UNKNOWN;
+ }
+
+ i3200_clear_error_info(mci);
+
+ rc = -ENODEV;
+ if (edac_mc_add_mc(mci)) {
+ debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
+ goto fail;
+ }
+
+ /* get this far and it's successful */
+ debugf3("MC: %s(): success\n", __func__);
+ return 0;
+
+fail:
+ iounmap(window);
+ if (mci)
+ edac_mc_free(mci);
+
+ return rc;
+}
+
+static int __devinit i3200_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ int rc;
+
+ debugf0("MC: %s()\n", __func__);
+
+ if (pci_enable_device(pdev) < 0)
+ return -EIO;
+
+ rc = i3200_probe1(pdev, ent->driver_data);
+ if (!mci_pdev)
+ mci_pdev = pci_dev_get(pdev);
+
+ return rc;
+}
+
+static void __devexit i3200_remove_one(struct pci_dev *pdev)
+{
+ struct mem_ctl_info *mci;
+ struct i3200_priv *priv;
+
+ debugf0("%s()\n", __func__);
+
+ mci = edac_mc_del_mc(&pdev->dev);
+ if (!mci)
+ return;
+
+ priv = mci->pvt_info;
+ iounmap(priv->window);
+
+ edac_mc_free(mci);
+}
+
+static const struct pci_device_id i3200_pci_tbl[] __devinitdata = {
+ {
+ PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ I3200},
+ {
+ 0,
+ } /* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
+
+static struct pci_driver i3200_driver = {
+ .name = EDAC_MOD_STR,
+ .probe = i3200_init_one,
+ .remove = __devexit_p(i3200_remove_one),
+ .id_table = i3200_pci_tbl,
+};
+
+static int __init i3200_init(void)
+{
+ int pci_rc;
+
+ debugf3("MC: %s()\n", __func__);
+
+ /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+ opstate_init();
+
+ pci_rc = pci_register_driver(&i3200_driver);
+ if (pci_rc < 0)
+ goto fail0;
+
+ if (!mci_pdev) {
+ i3200_registered = 0;
+ mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_3200_HB, NULL);
+ if (!mci_pdev) {
+ debugf0("i3200 pci_get_device fail\n");
+ pci_rc = -ENODEV;
+ goto fail1;
+ }
+
+ pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
+ if (pci_rc < 0) {
+ debugf0("i3200 init fail\n");
+ pci_rc = -ENODEV;
+ goto fail1;
+ }
+ }
+
+ return 0;
+
+fail1:
+ pci_unregister_driver(&i3200_driver);
+
+fail0:
+ if (mci_pdev)
+ pci_dev_put(mci_pdev);
+
+ return pci_rc;
+}
+
+static void __exit i3200_exit(void)
+{
+ debugf3("MC: %s()\n", __func__);
+
+ pci_unregister_driver(&i3200_driver);
+ if (!i3200_registered) {
+ i3200_remove_one(mci_pdev);
+ pci_dev_put(mci_pdev);
+ }
+}
+
+module_init(i3200_init);
+module_exit(i3200_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akamai Technologies, Inc.");
+MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 3f2ccfc6407..157f6504f25 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -41,7 +41,9 @@ static u32 orig_pci_err_en;
#endif
static u32 orig_l2_err_disable;
+#ifdef CONFIG_MPC85xx
static u32 orig_hid1[2];
+#endif
/************************ MC SYSFS parts ***********************************/
@@ -646,6 +648,7 @@ static struct of_device_id mpc85xx_l2_err_of_match[] = {
{ .compatible = "fsl,mpc8560-l2-cache-controller", },
{ .compatible = "fsl,mpc8568-l2-cache-controller", },
{ .compatible = "fsl,mpc8572-l2-cache-controller", },
+ { .compatible = "fsl,p2020-l2-cache-controller", },
{},
};
@@ -788,19 +791,20 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
csrow = &mci->csrows[index];
cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 +
(index * MPC85XX_MC_CS_BNDS_OFS));
- start = (cs_bnds & 0xfff0000) << 4;
- end = ((cs_bnds & 0xfff) << 20);
- if (start)
- start |= 0xfffff;
- if (end)
- end |= 0xfffff;
+
+ start = (cs_bnds & 0xffff0000) >> 16;
+ end = (cs_bnds & 0x0000ffff);
if (start == end)
continue; /* not populated */
+ start <<= (24 - PAGE_SHIFT);
+ end <<= (24 - PAGE_SHIFT);
+ end |= (1 << (24 - PAGE_SHIFT)) - 1;
+
csrow->first_page = start >> PAGE_SHIFT;
csrow->last_page = end >> PAGE_SHIFT;
- csrow->nr_pages = csrow->last_page + 1 - csrow->first_page;
+ csrow->nr_pages = end + 1 - start;
csrow->grain = 8;
csrow->mtype = mtype;
csrow->dtype = DEV_UNKNOWN;
@@ -984,6 +988,8 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = {
{ .compatible = "fsl,mpc8560-memory-controller", },
{ .compatible = "fsl,mpc8568-memory-controller", },
{ .compatible = "fsl,mpc8572-memory-controller", },
+ { .compatible = "fsl,mpc8349-memory-controller", },
+ { .compatible = "fsl,p2020-memory-controller", },
{},
};
@@ -999,13 +1005,13 @@ static struct of_platform_driver mpc85xx_mc_err_driver = {
},
};
-
+#ifdef CONFIG_MPC85xx
static void __init mpc85xx_mc_clear_rfxe(void *data)
{
orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1);
mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000));
}
-
+#endif
static int __init mpc85xx_mc_init(void)
{
@@ -1038,26 +1044,32 @@ static int __init mpc85xx_mc_init(void)
printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
#endif
+#ifdef CONFIG_MPC85xx
/*
* need to clear HID1[RFXE] to disable machine check int
* so we can catch it
*/
if (edac_op_state == EDAC_OPSTATE_INT)
on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0);
+#endif
return 0;
}
module_init(mpc85xx_mc_init);
+#ifdef CONFIG_MPC85xx
static void __exit mpc85xx_mc_restore_hid1(void *data)
{
mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]);
}
+#endif
static void __exit mpc85xx_mc_exit(void)
{
+#ifdef CONFIG_MPC85xx
on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);
+#endif
#ifdef CONFIG_PCI
of_unregister_platform_driver(&mpc85xx_pci_err_driver);
#endif
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index 5131aaae8e0..a6b9fec13a7 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -90,7 +90,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev)
return -ENOENT;
}
- pci_serr = ioremap(r->start, r->end - r->start + 1);
+ pci_serr = ioremap(r->start, resource_size(r));
if (!pci_serr)
return -ENOMEM;
@@ -140,7 +140,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
@@ -150,7 +150,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
pdata->pci_vbase = devm_ioremap(&pdev->dev,
r->start,
- r->end - r->start + 1);
+ resource_size(r));
if (!pdata->pci_vbase) {
printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
res = -ENOMEM;
@@ -306,7 +306,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while request mem region\n",
__func__);
@@ -316,7 +316,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
pdata->sram_vbase = devm_ioremap(&pdev->dev,
r->start,
- r->end - r->start + 1);
+ resource_size(r));
if (!pdata->sram_vbase) {
printk(KERN_ERR "%s: Unable to setup SRAM err regs\n",
__func__);
@@ -474,7 +474,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
@@ -484,7 +484,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev,
r->start,
- r->end - r->start + 1);
+ resource_size(r));
if (!pdata->cpu_vbase[0]) {
printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
res = -ENOMEM;
@@ -501,7 +501,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
@@ -511,7 +511,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev,
r->start,
- r->end - r->start + 1);
+ resource_size(r));
if (!pdata->cpu_vbase[1]) {
printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
res = -ENOMEM;
@@ -726,7 +726,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
@@ -736,7 +736,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
pdata->mc_vbase = devm_ioremap(&pdev->dev,
r->start,
- r->end - r->start + 1);
+ resource_size(r));
if (!pdata->mc_vbase) {
printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__);
res = -ENOMEM;
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index e4d971c8b9d..f831ea15929 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -102,6 +102,7 @@ config DRM_I915
select BACKLIGHT_CLASS_DEVICE if ACPI
select INPUT if ACPI
select ACPI_VIDEO if ACPI
+ select ACPI_BUTTON if ACPI
help
Choose this option if you have a system that has Intel 830M, 845G,
852GM, 855GM 865G or 915G integrated graphics. If M is selected, the
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 230c9ffdd5e..80391995bde 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -142,6 +142,19 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size)
if (IS_ERR(obj->filp))
goto free;
+ /* Basically we want to disable the OOM killer and handle ENOMEM
+ * ourselves by sacrificing pages from cached buffers.
+ * XXX shmem_file_[gs]et_gfp_mask()
+ */
+ mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping,
+ GFP_HIGHUSER |
+ __GFP_COLD |
+ __GFP_FS |
+ __GFP_RECLAIMABLE |
+ __GFP_NORETRY |
+ __GFP_NOWARN |
+ __GFP_NOMEMALLOC);
+
kref_init(&obj->refcount);
kref_init(&obj->handlecount);
obj->size = size;
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 5269dfa5f62..fa7b9be096b 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -9,6 +9,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
i915_gem.o \
i915_gem_debug.o \
i915_gem_tiling.o \
+ i915_trace_points.o \
intel_display.o \
intel_crt.o \
intel_lvds.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1e3bdcee863..f8ce9a3a420 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -96,11 +96,13 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
{
struct drm_gem_object *obj = obj_priv->obj;
- seq_printf(m, " %p: %s %08x %08x %d",
+ seq_printf(m, " %p: %s %8zd %08x %08x %d %s",
obj,
get_pin_flag(obj_priv),
+ obj->size,
obj->read_domains, obj->write_domain,
- obj_priv->last_rendering_seqno);
+ obj_priv->last_rendering_seqno,
+ obj_priv->dirty ? "dirty" : "");
if (obj->name)
seq_printf(m, " (name: %d)", obj->name);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5a49a1867b3..45d507ebd3f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -33,6 +33,7 @@
#include "intel_drv.h"
#include "i915_drm.h"
#include "i915_drv.h"
+#include "i915_trace.h"
#include <linux/vgaarb.h>
/* Really want an OS-independent resettable timer. Would like to have
@@ -50,14 +51,18 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
int i;
+ trace_i915_ring_wait_begin (dev);
+
for (i = 0; i < 100000; i++) {
ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
acthd = I915_READ(acthd_reg);
ring->space = ring->head - (ring->tail + 8);
if (ring->space < 0)
ring->space += ring->Size;
- if (ring->space >= n)
+ if (ring->space >= n) {
+ trace_i915_ring_wait_end (dev);
return 0;
+ }
if (dev->primary->master) {
struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
@@ -77,6 +82,7 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
}
+ trace_i915_ring_wait_end (dev);
return -EBUSY;
}
@@ -922,7 +928,8 @@ static int i915_get_bridge_dev(struct drm_device *dev)
* how much was set aside so we can use it for our own purposes.
*/
static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
- uint32_t *preallocated_size)
+ uint32_t *preallocated_size,
+ uint32_t *start)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u16 tmp = 0;
@@ -1009,10 +1016,159 @@ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
return -1;
}
*preallocated_size = stolen - overhead;
+ *start = overhead;
return 0;
}
+#define PTE_ADDRESS_MASK 0xfffff000
+#define PTE_ADDRESS_MASK_HIGH 0x000000f0 /* i915+ */
+#define PTE_MAPPING_TYPE_UNCACHED (0 << 1)
+#define PTE_MAPPING_TYPE_DCACHE (1 << 1) /* i830 only */
+#define PTE_MAPPING_TYPE_CACHED (3 << 1)
+#define PTE_MAPPING_TYPE_MASK (3 << 1)
+#define PTE_VALID (1 << 0)
+
+/**
+ * i915_gtt_to_phys - take a GTT address and turn it into a physical one
+ * @dev: drm device
+ * @gtt_addr: address to translate
+ *
+ * Some chip functions require allocations from stolen space but need the
+ * physical address of the memory in question. We use this routine
+ * to get a physical address suitable for register programming from a given
+ * GTT address.
+ */
+static unsigned long i915_gtt_to_phys(struct drm_device *dev,
+ unsigned long gtt_addr)
+{
+ unsigned long *gtt;
+ unsigned long entry, phys;
+ int gtt_bar = IS_I9XX(dev) ? 0 : 1;
+ int gtt_offset, gtt_size;
+
+ if (IS_I965G(dev)) {
+ if (IS_G4X(dev) || IS_IGDNG(dev)) {
+ gtt_offset = 2*1024*1024;
+ gtt_size = 2*1024*1024;
+ } else {
+ gtt_offset = 512*1024;
+ gtt_size = 512*1024;
+ }
+ } else {
+ gtt_bar = 3;
+ gtt_offset = 0;
+ gtt_size = pci_resource_len(dev->pdev, gtt_bar);
+ }
+
+ gtt = ioremap_wc(pci_resource_start(dev->pdev, gtt_bar) + gtt_offset,
+ gtt_size);
+ if (!gtt) {
+ DRM_ERROR("ioremap of GTT failed\n");
+ return 0;
+ }
+
+ entry = *(volatile u32 *)(gtt + (gtt_addr / 1024));
+
+ DRM_DEBUG("GTT addr: 0x%08lx, PTE: 0x%08lx\n", gtt_addr, entry);
+
+ /* Mask out these reserved bits on this hardware. */
+ if (!IS_I9XX(dev) || IS_I915G(dev) || IS_I915GM(dev) ||
+ IS_I945G(dev) || IS_I945GM(dev)) {
+ entry &= ~PTE_ADDRESS_MASK_HIGH;
+ }
+
+ /* If it's not a mapping type we know, then bail. */
+ if ((entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_UNCACHED &&
+ (entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_CACHED) {
+ iounmap(gtt);
+ return 0;
+ }
+
+ if (!(entry & PTE_VALID)) {
+ DRM_ERROR("bad GTT entry in stolen space\n");
+ iounmap(gtt);
+ return 0;
+ }
+
+ iounmap(gtt);
+
+ phys =(entry & PTE_ADDRESS_MASK) |
+ ((uint64_t)(entry & PTE_ADDRESS_MASK_HIGH) << (32 - 4));
+
+ DRM_DEBUG("GTT addr: 0x%08lx, phys addr: 0x%08lx\n", gtt_addr, phys);
+
+ return phys;
+}
+
+static void i915_warn_stolen(struct drm_device *dev)
+{
+ DRM_ERROR("not enough stolen space for compressed buffer, disabling\n");
+ DRM_ERROR("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
+}
+
+static void i915_setup_compression(struct drm_device *dev, int size)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_mm_node *compressed_fb, *compressed_llb;
+ unsigned long cfb_base, ll_base;
+
+ /* Leave 1M for line length buffer & misc. */
+ compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0);
+ if (!compressed_fb) {
+ i915_warn_stolen(dev);
+ return;
+ }
+
+ compressed_fb = drm_mm_get_block(compressed_fb, size, 4096);
+ if (!compressed_fb) {
+ i915_warn_stolen(dev);
+ return;
+ }
+
+ cfb_base = i915_gtt_to_phys(dev, compressed_fb->start);
+ if (!cfb_base) {
+ DRM_ERROR("failed to get stolen phys addr, disabling FBC\n");
+ drm_mm_put_block(compressed_fb);
+ }
+
+ if (!IS_GM45(dev)) {
+ compressed_llb = drm_mm_search_free(&dev_priv->vram, 4096,
+ 4096, 0);
+ if (!compressed_llb) {
+ i915_warn_stolen(dev);
+ return;
+ }
+
+ compressed_llb = drm_mm_get_block(compressed_llb, 4096, 4096);
+ if (!compressed_llb) {
+ i915_warn_stolen(dev);
+ return;
+ }
+
+ ll_base = i915_gtt_to_phys(dev, compressed_llb->start);
+ if (!ll_base) {
+ DRM_ERROR("failed to get stolen phys addr, disabling FBC\n");
+ drm_mm_put_block(compressed_fb);
+ drm_mm_put_block(compressed_llb);
+ }
+ }
+
+ dev_priv->cfb_size = size;
+
+ if (IS_GM45(dev)) {
+ g4x_disable_fbc(dev);
+ I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
+ } else {
+ i8xx_disable_fbc(dev);
+ I915_WRITE(FBC_CFB_BASE, cfb_base);
+ I915_WRITE(FBC_LL_BASE, ll_base);
+ }
+
+ DRM_DEBUG("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n", cfb_base,
+ ll_base, size >> 20);
+}
+
/* true = enable decode, false = disable decoder */
static unsigned int i915_vga_set_decode(void *cookie, bool state)
{
@@ -1027,6 +1183,7 @@ static unsigned int i915_vga_set_decode(void *cookie, bool state)
}
static int i915_load_modeset_init(struct drm_device *dev,
+ unsigned long prealloc_start,
unsigned long prealloc_size,
unsigned long agp_size)
{
@@ -1047,6 +1204,10 @@ static int i915_load_modeset_init(struct drm_device *dev,
/* Basic memrange allocator for stolen space (aka vram) */
drm_mm_init(&dev_priv->vram, 0, prealloc_size);
+ DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024));
+
+ /* We're off and running w/KMS */
+ dev_priv->mm.suspended = 0;
/* Let GEM Manage from end of prealloc space to end of aperture.
*
@@ -1059,10 +1220,25 @@ static int i915_load_modeset_init(struct drm_device *dev,
*/
i915_gem_do_init(dev, prealloc_size, agp_size - 4096);
+ mutex_lock(&dev->struct_mutex);
ret = i915_gem_init_ringbuffer(dev);
+ mutex_unlock(&dev->struct_mutex);
if (ret)
goto out;
+ /* Try to set up FBC with a reasonable compressed buffer size */
+ if (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev) || IS_GM45(dev)) &&
+ i915_powersave) {
+ int cfb_size;
+
+ /* Try to get an 8M buffer... */
+ if (prealloc_size > (9*1024*1024))
+ cfb_size = 8*1024*1024;
+ else /* fall back to 7/8 of the stolen space */
+ cfb_size = prealloc_size * 7 / 8;
+ i915_setup_compression(dev, cfb_size);
+ }
+
/* Allow hardware batchbuffers unless told otherwise.
*/
dev_priv->allow_batchbuffer = 1;
@@ -1180,7 +1356,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
struct drm_i915_private *dev_priv = dev->dev_private;
resource_size_t base, size;
int ret = 0, mmio_bar = IS_I9XX(dev) ? 0 : 1;
- uint32_t agp_size, prealloc_size;
+ uint32_t agp_size, prealloc_size, prealloc_start;
/* i915 has 4 more counters */
dev->counters += 4;
@@ -1234,7 +1410,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
"performance may suffer.\n");
}
- ret = i915_probe_agp(dev, &agp_size, &prealloc_size);
+ ret = i915_probe_agp(dev, &agp_size, &prealloc_size, &prealloc_start);
if (ret)
goto out_iomapfree;
@@ -1300,8 +1476,12 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
return ret;
}
+ /* Start out suspended */
+ dev_priv->mm.suspended = 1;
+
if (drm_core_check_feature(dev, DRIVER_MODESET)) {
- ret = i915_load_modeset_init(dev, prealloc_size, agp_size);
+ ret = i915_load_modeset_init(dev, prealloc_start,
+ prealloc_size, agp_size);
if (ret < 0) {
DRM_ERROR("failed to init modeset\n");
goto out_workqueue_free;
@@ -1313,6 +1493,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
if (!IS_IGDNG(dev))
intel_opregion_init(dev, 0);
+ setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed,
+ (unsigned long) dev);
return 0;
out_workqueue_free:
@@ -1333,6 +1515,7 @@ int i915_driver_unload(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
destroy_workqueue(dev_priv->wq);
+ del_timer_sync(&dev_priv->hangcheck_timer);
io_mapping_free(dev_priv->mm.gtt_mapping);
if (dev_priv->mm.gtt_mtrr >= 0) {
@@ -1472,6 +1655,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0),
DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0),
DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0),
+ DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, 0),
};
int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index dbe568c9327..b93814c0d3e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -89,6 +89,8 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state)
pci_set_power_state(dev->pdev, PCI_D3hot);
}
+ dev_priv->suspended = 1;
+
return 0;
}
@@ -97,8 +99,6 @@ static int i915_resume(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
int ret = 0;
- pci_set_power_state(dev->pdev, PCI_D0);
- pci_restore_state(dev->pdev);
if (pci_enable_device(dev->pdev))
return -1;
pci_set_master(dev->pdev);
@@ -124,9 +124,135 @@ static int i915_resume(struct drm_device *dev)
drm_helper_resume_force_mode(dev);
}
+ dev_priv->suspended = 0;
+
return ret;
}
+/**
+ * i965_reset - reset chip after a hang
+ * @dev: drm device to reset
+ * @flags: reset domains
+ *
+ * Reset the chip. Useful if a hang is detected. Returns zero on successful
+ * reset or otherwise an error code.
+ *
+ * Procedure is fairly simple:
+ * - reset the chip using the reset reg
+ * - re-init context state
+ * - re-init hardware status page
+ * - re-init ring buffer
+ * - re-init interrupt state
+ * - re-init display
+ */
+int i965_reset(struct drm_device *dev, u8 flags)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ unsigned long timeout;
+ u8 gdrst;
+ /*
+ * We really should only reset the display subsystem if we actually
+ * need to
+ */
+ bool need_display = true;
+
+ mutex_lock(&dev->struct_mutex);
+
+ /*
+ * Clear request list
+ */
+ i915_gem_retire_requests(dev);
+
+ if (need_display)
+ i915_save_display(dev);
+
+ if (IS_I965G(dev) || IS_G4X(dev)) {
+ /*
+ * Set the domains we want to reset, then the reset bit (bit 0).
+ * Clear the reset bit after a while and wait for hardware status
+ * bit (bit 1) to be set
+ */
+ pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+ pci_write_config_byte(dev->pdev, GDRST, gdrst | flags | ((flags == GDRST_FULL) ? 0x1 : 0x0));
+ udelay(50);
+ pci_write_config_byte(dev->pdev, GDRST, gdrst & 0xfe);
+
+ /* ...we don't want to loop forever though, 500ms should be plenty */
+ timeout = jiffies + msecs_to_jiffies(500);
+ do {
+ udelay(100);
+ pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+ } while ((gdrst & 0x1) && time_after(timeout, jiffies));
+
+ if (gdrst & 0x1) {
+ WARN(true, "i915: Failed to reset chip\n");
+ mutex_unlock(&dev->struct_mutex);
+ return -EIO;
+ }
+ } else {
+ DRM_ERROR("Error occurred. Don't know how to reset this chip.\n");
+ return -ENODEV;
+ }
+
+ /* Ok, now get things going again... */
+
+ /*
+ * Everything depends on having the GTT running, so we need to start
+ * there. Fortunately we don't need to do this unless we reset the
+ * chip at a PCI level.
+ *
+ * Next we need to restore the context, but we don't use those
+ * yet either...
+ *
+ * Ring buffer needs to be re-initialized in the KMS case, or if X
+ * was running at the time of the reset (i.e. we weren't VT
+ * switched away).
+ */
+ if (drm_core_check_feature(dev, DRIVER_MODESET) ||
+ !dev_priv->mm.suspended) {
+ drm_i915_ring_buffer_t *ring = &dev_priv->ring;
+ struct drm_gem_object *obj = ring->ring_obj;
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ dev_priv->mm.suspended = 0;
+
+ /* Stop the ring if it's running. */
+ I915_WRITE(PRB0_CTL, 0);
+ I915_WRITE(PRB0_TAIL, 0);
+ I915_WRITE(PRB0_HEAD, 0);
+
+ /* Initialize the ring. */
+ I915_WRITE(PRB0_START, obj_priv->gtt_offset);
+ I915_WRITE(PRB0_CTL,
+ ((obj->size - 4096) & RING_NR_PAGES) |
+ RING_NO_REPORT |
+ RING_VALID);
+ if (!drm_core_check_feature(dev, DRIVER_MODESET))
+ i915_kernel_lost_context(dev);
+ else {
+ ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+ ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
+ ring->space = ring->head - (ring->tail + 8);
+ if (ring->space < 0)
+ ring->space += ring->Size;
+ }
+
+ mutex_unlock(&dev->struct_mutex);
+ drm_irq_uninstall(dev);
+ drm_irq_install(dev);
+ mutex_lock(&dev->struct_mutex);
+ }
+
+ /*
+ * Display needs restore too...
+ */
+ if (need_display)
+ i915_restore_display(dev);
+
+ mutex_unlock(&dev->struct_mutex);
+ return 0;
+}
+
+
static int __devinit
i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
@@ -234,6 +360,8 @@ static int __init i915_init(void)
{
driver.num_ioctls = i915_max_ioctl;
+ i915_gem_shrinker_init();
+
/*
* If CONFIG_DRM_I915_KMS is set, default to KMS unless
* explicitly disabled with the module pararmeter.
@@ -260,6 +388,7 @@ static int __init i915_init(void)
static void __exit i915_exit(void)
{
+ i915_gem_shrinker_exit();
drm_exit(&driver);
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a0632f8e76a..b24b2d145b7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -48,6 +48,11 @@ enum pipe {
PIPE_B,
};
+enum plane {
+ PLANE_A = 0,
+ PLANE_B,
+};
+
#define I915_NUM_PIPE 2
/* Interface history:
@@ -148,6 +153,23 @@ struct drm_i915_error_state {
struct timeval time;
};
+struct drm_i915_display_funcs {
+ void (*dpms)(struct drm_crtc *crtc, int mode);
+ bool (*fbc_enabled)(struct drm_crtc *crtc);
+ void (*enable_fbc)(struct drm_crtc *crtc, unsigned long interval);
+ void (*disable_fbc)(struct drm_device *dev);
+ int (*get_display_clock_speed)(struct drm_device *dev);
+ int (*get_fifo_size)(struct drm_device *dev, int plane);
+ void (*update_wm)(struct drm_device *dev, int planea_clock,
+ int planeb_clock, int sr_hdisplay, int pixel_size);
+ /* clock updates for mode set */
+ /* cursor updates */
+ /* render clock increase/decrease */
+ /* display clock increase/decrease */
+ /* pll clock increase/decrease */
+ /* clock gating init */
+};
+
typedef struct drm_i915_private {
struct drm_device *dev;
@@ -198,10 +220,21 @@ typedef struct drm_i915_private {
unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds;
int vblank_pipe;
+ /* For hangcheck timer */
+#define DRM_I915_HANGCHECK_PERIOD 75 /* in jiffies */
+ struct timer_list hangcheck_timer;
+ int hangcheck_count;
+ uint32_t last_acthd;
+
bool cursor_needs_physical;
struct drm_mm vram;
+ unsigned long cfb_size;
+ unsigned long cfb_pitch;
+ int cfb_fence;
+ int cfb_plane;
+
int irq_enabled;
struct intel_opregion opregion;
@@ -222,6 +255,8 @@ typedef struct drm_i915_private {
unsigned int edp_support:1;
int lvds_ssc_freq;
+ struct notifier_block lid_notifier;
+
int crt_ddc_bus; /* -1 = unknown, else GPIO to use for CRT DDC */
struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */
int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
@@ -234,7 +269,11 @@ typedef struct drm_i915_private {
struct work_struct error_work;
struct workqueue_struct *wq;
+ /* Display functions */
+ struct drm_i915_display_funcs display;
+
/* Register state */
+ bool suspended;
u8 saveLBB;
u32 saveDSPACNTR;
u32 saveDSPBCNTR;
@@ -350,6 +389,15 @@ typedef struct drm_i915_private {
int gtt_mtrr;
/**
+ * Membership on list of all loaded devices, used to evict
+ * inactive buffers under memory pressure.
+ *
+ * Modifications should only be done whilst holding the
+ * shrink_list_lock spinlock.
+ */
+ struct list_head shrink_list;
+
+ /**
* List of objects currently involved in rendering from the
* ringbuffer.
*
@@ -432,7 +480,7 @@ typedef struct drm_i915_private {
* It prevents command submission from occuring and makes
* every pending request fail
*/
- int wedged;
+ atomic_t wedged;
/** Bit 6 swizzling required for X tiling */
uint32_t bit_6_swizzle_x;
@@ -491,10 +539,7 @@ struct drm_i915_gem_object {
* This is the same as gtt_space->start
*/
uint32_t gtt_offset;
- /**
- * Required alignment for the object
- */
- uint32_t gtt_alignment;
+
/**
* Fake offset for use by mmap(2)
*/
@@ -541,6 +586,11 @@ struct drm_i915_gem_object {
* in an execbuffer object list.
*/
int in_execbuffer;
+
+ /**
+ * Advice: are the backing pages purgeable?
+ */
+ int madv;
};
/**
@@ -585,6 +635,8 @@ extern int i915_max_ioctl;
extern unsigned int i915_fbpercrtc;
extern unsigned int i915_powersave;
+extern void i915_save_display(struct drm_device *dev);
+extern void i915_restore_display(struct drm_device *dev);
extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master);
@@ -604,8 +656,10 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
extern int i915_emit_box(struct drm_device *dev,
struct drm_clip_rect *boxes,
int i, int DR1, int DR4);
+extern int i965_reset(struct drm_device *dev, u8 flags);
/* i915_irq.c */
+void i915_hangcheck_elapsed(unsigned long data);
extern int i915_irq_emit(struct drm_device *dev, void *data,
struct drm_file *file_priv);
extern int i915_irq_wait(struct drm_device *dev, void *data,
@@ -676,6 +730,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
@@ -695,6 +751,7 @@ int i915_gem_object_unbind(struct drm_gem_object *obj);
void i915_gem_release_mmap(struct drm_gem_object *obj);
void i915_gem_lastclose(struct drm_device *dev);
uint32_t i915_get_gem_seqno(struct drm_device *dev);
+bool i915_seqno_passed(uint32_t seq1, uint32_t seq2);
int i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
int i915_gem_object_put_fence_reg(struct drm_gem_object *obj);
void i915_gem_retire_requests(struct drm_device *dev);
@@ -720,6 +777,9 @@ int i915_gem_object_get_pages(struct drm_gem_object *obj);
void i915_gem_object_put_pages(struct drm_gem_object *obj);
void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv);
+void i915_gem_shrinker_init(void);
+void i915_gem_shrinker_exit(void);
+
/* i915_gem_tiling.c */
void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
@@ -767,6 +827,8 @@ static inline void opregion_enable_asle(struct drm_device *dev) { return; }
extern void intel_modeset_init(struct drm_device *dev);
extern void intel_modeset_cleanup(struct drm_device *dev);
extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
+extern void i8xx_disable_fbc(struct drm_device *dev);
+extern void g4x_disable_fbc(struct drm_device *dev);
/**
* Lock test for when it's just for synchronization of ring access.
@@ -864,6 +926,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
(dev)->pci_device == 0x2E12 || \
(dev)->pci_device == 0x2E22 || \
(dev)->pci_device == 0x2E32 || \
+ (dev)->pci_device == 0x2E42 || \
(dev)->pci_device == 0x0042 || \
(dev)->pci_device == 0x0046)
@@ -876,6 +939,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
(dev)->pci_device == 0x2E12 || \
(dev)->pci_device == 0x2E22 || \
(dev)->pci_device == 0x2E32 || \
+ (dev)->pci_device == 0x2E42 || \
IS_GM45(dev))
#define IS_IGDG(dev) ((dev)->pci_device == 0xa001)
@@ -909,12 +973,13 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
#define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev) || IS_IGDNG(dev))
#define SUPPORTS_INTEGRATED_DP(dev) (IS_G4X(dev) || IS_IGDNG(dev))
#define SUPPORTS_EDP(dev) (IS_IGDNG_M(dev))
-#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev))
+#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev) || IS_I965G(dev))
/* dsparb controlled by hw only */
#define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IGDNG(dev))
#define HAS_FW_BLC(dev) (IS_I9XX(dev) || IS_G4X(dev) || IS_IGDNG(dev))
#define HAS_PIPE_CXSR(dev) (IS_G4X(dev) || IS_IGDNG(dev))
+#define I915_HAS_FBC(dev) (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev)))
#define PRIMARY_RINGBUFFER_SIZE (128*1024)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c67317112f4..40727d4c291 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,6 +29,7 @@
#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
+#include "i915_trace.h"
#include "intel_drv.h"
#include <linux/swap.h>
#include <linux/pci.h>
@@ -48,11 +49,15 @@ static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
unsigned alignment);
static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
-static int i915_gem_evict_something(struct drm_device *dev);
+static int i915_gem_evict_something(struct drm_device *dev, int min_size);
+static int i915_gem_evict_from_inactive_list(struct drm_device *dev);
static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
struct drm_i915_gem_pwrite *args,
struct drm_file *file_priv);
+static LIST_HEAD(shrink_list);
+static DEFINE_SPINLOCK(shrink_list_lock);
+
int i915_gem_do_init(struct drm_device *dev, unsigned long start,
unsigned long end)
{
@@ -316,6 +321,45 @@ fail_unlock:
return ret;
}
+static inline gfp_t
+i915_gem_object_get_page_gfp_mask (struct drm_gem_object *obj)
+{
+ return mapping_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping);
+}
+
+static inline void
+i915_gem_object_set_page_gfp_mask (struct drm_gem_object *obj, gfp_t gfp)
+{
+ mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, gfp);
+}
+
+static int
+i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
+{
+ int ret;
+
+ ret = i915_gem_object_get_pages(obj);
+
+ /* If we've insufficient memory to map in the pages, attempt
+ * to make some space by throwing out some old buffers.
+ */
+ if (ret == -ENOMEM) {
+ struct drm_device *dev = obj->dev;
+ gfp_t gfp;
+
+ ret = i915_gem_evict_something(dev, obj->size);
+ if (ret)
+ return ret;
+
+ gfp = i915_gem_object_get_page_gfp_mask(obj);
+ i915_gem_object_set_page_gfp_mask(obj, gfp & ~__GFP_NORETRY);
+ ret = i915_gem_object_get_pages(obj);
+ i915_gem_object_set_page_gfp_mask (obj, gfp);
+ }
+
+ return ret;
+}
+
/**
* This is the fallback shmem pread path, which allocates temporary storage
* in kernel space to copy_to_user into outside of the struct_mutex, so we
@@ -367,8 +411,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
mutex_lock(&dev->struct_mutex);
- ret = i915_gem_object_get_pages(obj);
- if (ret != 0)
+ ret = i915_gem_object_get_pages_or_evict(obj);
+ if (ret)
goto fail_unlock;
ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
@@ -842,8 +886,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
mutex_lock(&dev->struct_mutex);
- ret = i915_gem_object_get_pages(obj);
- if (ret != 0)
+ ret = i915_gem_object_get_pages_or_evict(obj);
+ if (ret)
goto fail_unlock;
ret = i915_gem_object_set_to_cpu_domain(obj, 1);
@@ -1155,28 +1199,22 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
/* Now bind it into the GTT if needed */
mutex_lock(&dev->struct_mutex);
if (!obj_priv->gtt_space) {
- ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
- if (ret) {
- mutex_unlock(&dev->struct_mutex);
- return VM_FAULT_SIGBUS;
- }
-
- ret = i915_gem_object_set_to_gtt_domain(obj, write);
- if (ret) {
- mutex_unlock(&dev->struct_mutex);
- return VM_FAULT_SIGBUS;
- }
+ ret = i915_gem_object_bind_to_gtt(obj, 0);
+ if (ret)
+ goto unlock;
list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, write);
+ if (ret)
+ goto unlock;
}
/* Need a new fence register? */
if (obj_priv->tiling_mode != I915_TILING_NONE) {
ret = i915_gem_object_get_fence_reg(obj);
- if (ret) {
- mutex_unlock(&dev->struct_mutex);
- return VM_FAULT_SIGBUS;
- }
+ if (ret)
+ goto unlock;
}
pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
@@ -1184,18 +1222,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
/* Finally, remap it using the new GTT offset */
ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
-
+unlock:
mutex_unlock(&dev->struct_mutex);
switch (ret) {
+ case 0:
+ case -ERESTARTSYS:
+ return VM_FAULT_NOPAGE;
case -ENOMEM:
case -EAGAIN:
return VM_FAULT_OOM;
- case -EFAULT:
- case -EINVAL:
- return VM_FAULT_SIGBUS;
default:
- return VM_FAULT_NOPAGE;
+ return VM_FAULT_SIGBUS;
}
}
@@ -1388,6 +1426,14 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
obj_priv = obj->driver_private;
+ if (obj_priv->madv != I915_MADV_WILLNEED) {
+ DRM_ERROR("Attempting to mmap a purgeable buffer\n");
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+ return -EINVAL;
+ }
+
+
if (!obj_priv->mmap_offset) {
ret = i915_gem_create_mmap_offset(obj);
if (ret) {
@@ -1399,22 +1445,12 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
args->offset = obj_priv->mmap_offset;
- obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj);
-
- /* Make sure the alignment is correct for fence regs etc */
- if (obj_priv->agp_mem &&
- (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) {
- drm_gem_object_unreference(obj);
- mutex_unlock(&dev->struct_mutex);
- return -EINVAL;
- }
-
/*
* Pull it into the GTT so that we have a page list (makes the
* initial fault faster and any subsequent flushing possible).
*/
if (!obj_priv->agp_mem) {
- ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+ ret = i915_gem_object_bind_to_gtt(obj, 0);
if (ret) {
drm_gem_object_unreference(obj);
mutex_unlock(&dev->struct_mutex);
@@ -1437,6 +1473,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
int i;
BUG_ON(obj_priv->pages_refcount == 0);
+ BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
if (--obj_priv->pages_refcount != 0)
return;
@@ -1444,13 +1481,21 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
if (obj_priv->tiling_mode != I915_TILING_NONE)
i915_gem_object_save_bit_17_swizzle(obj);
- for (i = 0; i < page_count; i++)
- if (obj_priv->pages[i] != NULL) {
- if (obj_priv->dirty)
- set_page_dirty(obj_priv->pages[i]);
+ if (obj_priv->madv == I915_MADV_DONTNEED)
+ obj_priv->dirty = 0;
+
+ for (i = 0; i < page_count; i++) {
+ if (obj_priv->pages[i] == NULL)
+ break;
+
+ if (obj_priv->dirty)
+ set_page_dirty(obj_priv->pages[i]);
+
+ if (obj_priv->madv == I915_MADV_WILLNEED)
mark_page_accessed(obj_priv->pages[i]);
- page_cache_release(obj_priv->pages[i]);
- }
+
+ page_cache_release(obj_priv->pages[i]);
+ }
obj_priv->dirty = 0;
drm_free_large(obj_priv->pages);
@@ -1489,6 +1534,26 @@ i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
obj_priv->last_rendering_seqno = 0;
}
+/* Immediately discard the backing storage */
+static void
+i915_gem_object_truncate(struct drm_gem_object *obj)
+{
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ struct inode *inode;
+
+ inode = obj->filp->f_path.dentry->d_inode;
+ if (inode->i_op->truncate)
+ inode->i_op->truncate (inode);
+
+ obj_priv->madv = __I915_MADV_PURGED;
+}
+
+static inline int
+i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
+{
+ return obj_priv->madv == I915_MADV_DONTNEED;
+}
+
static void
i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
{
@@ -1577,15 +1642,24 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
if ((obj->write_domain & flush_domains) ==
obj->write_domain) {
+ uint32_t old_write_domain = obj->write_domain;
+
obj->write_domain = 0;
i915_gem_object_move_to_active(obj, seqno);
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
}
}
- if (was_empty && !dev_priv->mm.suspended)
- queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+ if (!dev_priv->mm.suspended) {
+ mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
+ if (was_empty)
+ queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+ }
return seqno;
}
@@ -1623,6 +1697,8 @@ i915_gem_retire_request(struct drm_device *dev,
{
drm_i915_private_t *dev_priv = dev->dev_private;
+ trace_i915_gem_request_retire(dev, request->seqno);
+
/* Move any buffers on the active list that are no longer referenced
* by the ringbuffer to the flushing/inactive lists as appropriate.
*/
@@ -1671,7 +1747,7 @@ out:
/**
* Returns true if seq1 is later than seq2.
*/
-static int
+bool
i915_seqno_passed(uint32_t seq1, uint32_t seq2)
{
return (int32_t)(seq1 - seq2) >= 0;
@@ -1709,7 +1785,7 @@ i915_gem_retire_requests(struct drm_device *dev)
retiring_seqno = request->seqno;
if (i915_seqno_passed(seqno, retiring_seqno) ||
- dev_priv->mm.wedged) {
+ atomic_read(&dev_priv->mm.wedged)) {
i915_gem_retire_request(dev, request);
list_del(&request->list);
@@ -1751,6 +1827,9 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
BUG_ON(seqno == 0);
+ if (atomic_read(&dev_priv->mm.wedged))
+ return -EIO;
+
if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
if (IS_IGDNG(dev))
ier = I915_READ(DEIER) | I915_READ(GTIER);
@@ -1763,16 +1842,20 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
i915_driver_irq_postinstall(dev);
}
+ trace_i915_gem_request_wait_begin(dev, seqno);
+
dev_priv->mm.waiting_gem_seqno = seqno;
i915_user_irq_get(dev);
ret = wait_event_interruptible(dev_priv->irq_queue,
i915_seqno_passed(i915_get_gem_seqno(dev),
seqno) ||
- dev_priv->mm.wedged);
+ atomic_read(&dev_priv->mm.wedged));
i915_user_irq_put(dev);
dev_priv->mm.waiting_gem_seqno = 0;
+
+ trace_i915_gem_request_wait_end(dev, seqno);
}
- if (dev_priv->mm.wedged)
+ if (atomic_read(&dev_priv->mm.wedged))
ret = -EIO;
if (ret && ret != -ERESTARTSYS)
@@ -1803,6 +1886,8 @@ i915_gem_flush(struct drm_device *dev,
DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
invalidate_domains, flush_domains);
#endif
+ trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno,
+ invalidate_domains, flush_domains);
if (flush_domains & I915_GEM_DOMAIN_CPU)
drm_agp_chipset_flush(dev);
@@ -1915,6 +2000,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
return -EINVAL;
}
+ /* blow away mappings if mapped through GTT */
+ i915_gem_release_mmap(obj);
+
+ if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
+ i915_gem_clear_fence_reg(obj);
+
/* Move the object to the CPU domain to ensure that
* any possible CPU writes while it's not in the GTT
* are flushed when we go to remap it. This will
@@ -1928,21 +2019,16 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
return ret;
}
+ BUG_ON(obj_priv->active);
+
if (obj_priv->agp_mem != NULL) {
drm_unbind_agp(obj_priv->agp_mem);
drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
obj_priv->agp_mem = NULL;
}
- BUG_ON(obj_priv->active);
-
- /* blow away mappings if mapped through GTT */
- i915_gem_release_mmap(obj);
-
- if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
- i915_gem_clear_fence_reg(obj);
-
i915_gem_object_put_pages(obj);
+ BUG_ON(obj_priv->pages_refcount);
if (obj_priv->gtt_space) {
atomic_dec(&dev->gtt_count);
@@ -1956,40 +2042,113 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
if (!list_empty(&obj_priv->list))
list_del_init(&obj_priv->list);
+ if (i915_gem_object_is_purgeable(obj_priv))
+ i915_gem_object_truncate(obj);
+
+ trace_i915_gem_object_unbind(obj);
+
return 0;
}
+static struct drm_gem_object *
+i915_gem_find_inactive_object(struct drm_device *dev, int min_size)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_i915_gem_object *obj_priv;
+ struct drm_gem_object *best = NULL;
+ struct drm_gem_object *first = NULL;
+
+ /* Try to find the smallest clean object */
+ list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) {
+ struct drm_gem_object *obj = obj_priv->obj;
+ if (obj->size >= min_size) {
+ if ((!obj_priv->dirty ||
+ i915_gem_object_is_purgeable(obj_priv)) &&
+ (!best || obj->size < best->size)) {
+ best = obj;
+ if (best->size == min_size)
+ return best;
+ }
+ if (!first)
+ first = obj;
+ }
+ }
+
+ return best ? best : first;
+}
+
static int
-i915_gem_evict_something(struct drm_device *dev)
+i915_gem_evict_everything(struct drm_device *dev)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ uint32_t seqno;
+ int ret;
+ bool lists_empty;
+
+ spin_lock(&dev_priv->mm.active_list_lock);
+ lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
+ list_empty(&dev_priv->mm.flushing_list) &&
+ list_empty(&dev_priv->mm.active_list));
+ spin_unlock(&dev_priv->mm.active_list_lock);
+
+ if (lists_empty)
+ return -ENOSPC;
+
+ /* Flush everything (on to the inactive lists) and evict */
+ i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+ seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
+ if (seqno == 0)
+ return -ENOMEM;
+
+ ret = i915_wait_request(dev, seqno);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_evict_from_inactive_list(dev);
+ if (ret)
+ return ret;
+
+ spin_lock(&dev_priv->mm.active_list_lock);
+ lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
+ list_empty(&dev_priv->mm.flushing_list) &&
+ list_empty(&dev_priv->mm.active_list));
+ spin_unlock(&dev_priv->mm.active_list_lock);
+ BUG_ON(!lists_empty);
+
+ return 0;
+}
+
+static int
+i915_gem_evict_something(struct drm_device *dev, int min_size)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_gem_object *obj;
- struct drm_i915_gem_object *obj_priv;
- int ret = 0;
+ int ret;
for (;;) {
+ i915_gem_retire_requests(dev);
+
/* If there's an inactive buffer available now, grab it
* and be done.
*/
- if (!list_empty(&dev_priv->mm.inactive_list)) {
- obj_priv = list_first_entry(&dev_priv->mm.inactive_list,
- struct drm_i915_gem_object,
- list);
- obj = obj_priv->obj;
- BUG_ON(obj_priv->pin_count != 0);
+ obj = i915_gem_find_inactive_object(dev, min_size);
+ if (obj) {
+ struct drm_i915_gem_object *obj_priv;
+
#if WATCH_LRU
DRM_INFO("%s: evicting %p\n", __func__, obj);
#endif
+ obj_priv = obj->driver_private;
+ BUG_ON(obj_priv->pin_count != 0);
BUG_ON(obj_priv->active);
/* Wait on the rendering and unbind the buffer. */
- ret = i915_gem_object_unbind(obj);
- break;
+ return i915_gem_object_unbind(obj);
}
/* If we didn't get anything, but the ring is still processing
- * things, wait for one of those things to finish and hopefully
- * leave us a buffer to evict.
+ * things, wait for the next to finish and hopefully leave us
+ * a buffer to evict.
*/
if (!list_empty(&dev_priv->mm.request_list)) {
struct drm_i915_gem_request *request;
@@ -2000,16 +2159,9 @@ i915_gem_evict_something(struct drm_device *dev)
ret = i915_wait_request(dev, request->seqno);
if (ret)
- break;
+ return ret;
- /* if waiting caused an object to become inactive,
- * then loop around and wait for it. Otherwise, we
- * assume that waiting freed and unbound something,
- * so there should now be some space in the GTT
- */
- if (!list_empty(&dev_priv->mm.inactive_list))
- continue;
- break;
+ continue;
}
/* If we didn't have anything on the request list but there
@@ -2018,46 +2170,44 @@ i915_gem_evict_something(struct drm_device *dev)
* will get moved to inactive.
*/
if (!list_empty(&dev_priv->mm.flushing_list)) {
- obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
- struct drm_i915_gem_object,
- list);
- obj = obj_priv->obj;
+ struct drm_i915_gem_object *obj_priv;
- i915_gem_flush(dev,
- obj->write_domain,
- obj->write_domain);
- i915_add_request(dev, NULL, obj->write_domain);
+ /* Find an object that we can immediately reuse */
+ list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) {
+ obj = obj_priv->obj;
+ if (obj->size >= min_size)
+ break;
- obj = NULL;
- continue;
- }
+ obj = NULL;
+ }
- DRM_ERROR("inactive empty %d request empty %d "
- "flushing empty %d\n",
- list_empty(&dev_priv->mm.inactive_list),
- list_empty(&dev_priv->mm.request_list),
- list_empty(&dev_priv->mm.flushing_list));
- /* If we didn't do any of the above, there's nothing to be done
- * and we just can't fit it in.
- */
- return -ENOSPC;
- }
- return ret;
-}
+ if (obj != NULL) {
+ uint32_t seqno;
-static int
-i915_gem_evict_everything(struct drm_device *dev)
-{
- int ret;
+ i915_gem_flush(dev,
+ obj->write_domain,
+ obj->write_domain);
+ seqno = i915_add_request(dev, NULL, obj->write_domain);
+ if (seqno == 0)
+ return -ENOMEM;
- for (;;) {
- ret = i915_gem_evict_something(dev);
- if (ret != 0)
- break;
+ ret = i915_wait_request(dev, seqno);
+ if (ret)
+ return ret;
+
+ continue;
+ }
+ }
+
+ /* If we didn't do any of the above, there's no single buffer
+ * large enough to swap out for the new one, so just evict
+ * everything and start again. (This should be rare.)
+ */
+ if (!list_empty (&dev_priv->mm.inactive_list))
+ return i915_gem_evict_from_inactive_list(dev);
+ else
+ return i915_gem_evict_everything(dev);
}
- if (ret == -ENOSPC)
- return 0;
- return ret;
}
int
@@ -2080,7 +2230,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
BUG_ON(obj_priv->pages != NULL);
obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
if (obj_priv->pages == NULL) {
- DRM_ERROR("Faled to allocate page list\n");
obj_priv->pages_refcount--;
return -ENOMEM;
}
@@ -2091,7 +2240,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
page = read_mapping_page(mapping, i, NULL);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
- DRM_ERROR("read_mapping_page failed: %d\n", ret);
i915_gem_object_put_pages(obj);
return ret;
}
@@ -2328,6 +2476,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
else
i830_write_fence_reg(reg);
+ trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode);
+
return 0;
}
@@ -2410,10 +2560,17 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
struct drm_mm_node *free_space;
- int page_count, ret;
+ bool retry_alloc = false;
+ int ret;
if (dev_priv->mm.suspended)
return -EBUSY;
+
+ if (obj_priv->madv != I915_MADV_WILLNEED) {
+ DRM_ERROR("Attempting to bind a purgeable object\n");
+ return -EINVAL;
+ }
+
if (alignment == 0)
alignment = i915_gem_get_gtt_alignment(obj);
if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
@@ -2433,30 +2590,16 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
}
}
if (obj_priv->gtt_space == NULL) {
- bool lists_empty;
-
/* If the gtt is empty and we're still having trouble
* fitting our object in, we're out of memory.
*/
#if WATCH_LRU
DRM_INFO("%s: GTT full, evicting something\n", __func__);
#endif
- spin_lock(&dev_priv->mm.active_list_lock);
- lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
- list_empty(&dev_priv->mm.flushing_list) &&
- list_empty(&dev_priv->mm.active_list));
- spin_unlock(&dev_priv->mm.active_list_lock);
- if (lists_empty) {
- DRM_ERROR("GTT full, but LRU list empty\n");
- return -ENOSPC;
- }
-
- ret = i915_gem_evict_something(dev);
- if (ret != 0) {
- if (ret != -ERESTARTSYS)
- DRM_ERROR("Failed to evict a buffer %d\n", ret);
+ ret = i915_gem_evict_something(dev, obj->size);
+ if (ret)
return ret;
- }
+
goto search_free;
}
@@ -2464,27 +2607,56 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
DRM_INFO("Binding object of size %zd at 0x%08x\n",
obj->size, obj_priv->gtt_offset);
#endif
+ if (retry_alloc) {
+ i915_gem_object_set_page_gfp_mask (obj,
+ i915_gem_object_get_page_gfp_mask (obj) & ~__GFP_NORETRY);
+ }
ret = i915_gem_object_get_pages(obj);
+ if (retry_alloc) {
+ i915_gem_object_set_page_gfp_mask (obj,
+ i915_gem_object_get_page_gfp_mask (obj) | __GFP_NORETRY);
+ }
if (ret) {
drm_mm_put_block(obj_priv->gtt_space);
obj_priv->gtt_space = NULL;
+
+ if (ret == -ENOMEM) {
+ /* first try to clear up some space from the GTT */
+ ret = i915_gem_evict_something(dev, obj->size);
+ if (ret) {
+ /* now try to shrink everyone else */
+ if (! retry_alloc) {
+ retry_alloc = true;
+ goto search_free;
+ }
+
+ return ret;
+ }
+
+ goto search_free;
+ }
+
return ret;
}
- page_count = obj->size / PAGE_SIZE;
/* Create an AGP memory structure pointing at our pages, and bind it
* into the GTT.
*/
obj_priv->agp_mem = drm_agp_bind_pages(dev,
obj_priv->pages,
- page_count,
+ obj->size >> PAGE_SHIFT,
obj_priv->gtt_offset,
obj_priv->agp_type);
if (obj_priv->agp_mem == NULL) {
i915_gem_object_put_pages(obj);
drm_mm_put_block(obj_priv->gtt_space);
obj_priv->gtt_space = NULL;
- return -ENOMEM;
+
+ ret = i915_gem_evict_something(dev, obj->size);
+ if (ret)
+ return ret;
+
+ goto search_free;
}
atomic_inc(&dev->gtt_count);
atomic_add(obj->size, &dev->gtt_memory);
@@ -2496,6 +2668,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+ trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
+
return 0;
}
@@ -2511,15 +2685,7 @@ i915_gem_clflush_object(struct drm_gem_object *obj)
if (obj_priv->pages == NULL)
return;
- /* XXX: The 865 in particular appears to be weird in how it handles
- * cache flushing. We haven't figured it out, but the
- * clflush+agp_chipset_flush doesn't appear to successfully get the
- * data visible to the PGU, while wbinvd + agp_chipset_flush does.
- */
- if (IS_I865G(obj->dev)) {
- wbinvd();
- return;
- }
+ trace_i915_gem_object_clflush(obj);
drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
}
@@ -2530,21 +2696,29 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
uint32_t seqno;
+ uint32_t old_write_domain;
if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
return;
/* Queue the GPU write cache flushing we need. */
+ old_write_domain = obj->write_domain;
i915_gem_flush(dev, 0, obj->write_domain);
seqno = i915_add_request(dev, NULL, obj->write_domain);
obj->write_domain = 0;
i915_gem_object_move_to_active(obj, seqno);
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
/** Flushes the GTT write domain for the object if it's dirty. */
static void
i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
{
+ uint32_t old_write_domain;
+
if (obj->write_domain != I915_GEM_DOMAIN_GTT)
return;
@@ -2552,7 +2726,12 @@ i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
* to it immediately go to main memory as far as we know, so there's
* no chipset flush. It also doesn't land in render cache.
*/
+ old_write_domain = obj->write_domain;
obj->write_domain = 0;
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
/** Flushes the CPU write domain for the object if it's dirty. */
@@ -2560,13 +2739,19 @@ static void
i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
+ uint32_t old_write_domain;
if (obj->write_domain != I915_GEM_DOMAIN_CPU)
return;
i915_gem_clflush_object(obj);
drm_agp_chipset_flush(dev);
+ old_write_domain = obj->write_domain;
obj->write_domain = 0;
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
/**
@@ -2579,6 +2764,7 @@ int
i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ uint32_t old_write_domain, old_read_domains;
int ret;
/* Not valid to be called on unbound objects. */
@@ -2591,6 +2777,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
if (ret != 0)
return ret;
+ old_write_domain = obj->write_domain;
+ old_read_domains = obj->read_domains;
+
/* If we're writing through the GTT domain, then CPU and GPU caches
* will need to be invalidated at next use.
*/
@@ -2609,6 +2798,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
obj_priv->dirty = 1;
}
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ old_write_domain);
+
return 0;
}
@@ -2621,6 +2814,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
static int
i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
{
+ uint32_t old_write_domain, old_read_domains;
int ret;
i915_gem_object_flush_gpu_write_domain(obj);
@@ -2636,6 +2830,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
*/
i915_gem_object_set_to_full_cpu_read_domain(obj);
+ old_write_domain = obj->write_domain;
+ old_read_domains = obj->read_domains;
+
/* Flush the CPU cache if it's still invalid. */
if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
i915_gem_clflush_object(obj);
@@ -2656,6 +2853,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
obj->write_domain = I915_GEM_DOMAIN_CPU;
}
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ old_write_domain);
+
return 0;
}
@@ -2777,6 +2978,7 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
struct drm_i915_gem_object *obj_priv = obj->driver_private;
uint32_t invalidate_domains = 0;
uint32_t flush_domains = 0;
+ uint32_t old_read_domains;
BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
@@ -2823,6 +3025,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
i915_gem_clflush_object(obj);
}
+ old_read_domains = obj->read_domains;
+
/* The actual obj->write_domain will be updated with
* pending_write_domain after we emit the accumulated flush for all
* of our domain changes in execbuffers (which clears objects'
@@ -2841,6 +3045,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
obj->read_domains, obj->write_domain,
dev->invalidate_domains, dev->flush_domains);
#endif
+
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ obj->write_domain);
}
/**
@@ -2893,6 +3101,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
uint64_t offset, uint64_t size)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ uint32_t old_read_domains;
int i, ret;
if (offset == 0 && size == obj->size)
@@ -2939,8 +3148,13 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
*/
BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
+ old_read_domains = obj->read_domains;
obj->read_domains |= I915_GEM_DOMAIN_CPU;
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ obj->write_domain);
+
return 0;
}
@@ -2984,6 +3198,21 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
}
target_obj_priv = target_obj->driver_private;
+#if WATCH_RELOC
+ DRM_INFO("%s: obj %p offset %08x target %d "
+ "read %08x write %08x gtt %08x "
+ "presumed %08x delta %08x\n",
+ __func__,
+ obj,
+ (int) reloc->offset,
+ (int) reloc->target_handle,
+ (int) reloc->read_domains,
+ (int) reloc->write_domain,
+ (int) target_obj_priv->gtt_offset,
+ (int) reloc->presumed_offset,
+ reloc->delta);
+#endif
+
/* The target buffer should have appeared before us in the
* exec_object list, so it should have a GTT space bound by now.
*/
@@ -2995,25 +3224,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
return -EINVAL;
}
- if (reloc->offset > obj->size - 4) {
- DRM_ERROR("Relocation beyond object bounds: "
- "obj %p target %d offset %d size %d.\n",
- obj, reloc->target_handle,
- (int) reloc->offset, (int) obj->size);
- drm_gem_object_unreference(target_obj);
- i915_gem_object_unpin(obj);
- return -EINVAL;
- }
- if (reloc->offset & 3) {
- DRM_ERROR("Relocation not 4-byte aligned: "
- "obj %p target %d offset %d.\n",
- obj, reloc->target_handle,
- (int) reloc->offset);
- drm_gem_object_unreference(target_obj);
- i915_gem_object_unpin(obj);
- return -EINVAL;
- }
-
+ /* Validate that the target is in a valid r/w GPU domain */
if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
reloc->read_domains & I915_GEM_DOMAIN_CPU) {
DRM_ERROR("reloc with read/write CPU domains: "
@@ -3027,7 +3238,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
i915_gem_object_unpin(obj);
return -EINVAL;
}
-
if (reloc->write_domain && target_obj->pending_write_domain &&
reloc->write_domain != target_obj->pending_write_domain) {
DRM_ERROR("Write domain conflict: "
@@ -3042,21 +3252,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
return -EINVAL;
}
-#if WATCH_RELOC
- DRM_INFO("%s: obj %p offset %08x target %d "
- "read %08x write %08x gtt %08x "
- "presumed %08x delta %08x\n",
- __func__,
- obj,
- (int) reloc->offset,
- (int) reloc->target_handle,
- (int) reloc->read_domains,
- (int) reloc->write_domain,
- (int) target_obj_priv->gtt_offset,
- (int) reloc->presumed_offset,
- reloc->delta);
-#endif
-
target_obj->pending_read_domains |= reloc->read_domains;
target_obj->pending_write_domain |= reloc->write_domain;
@@ -3068,6 +3263,37 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
continue;
}
+ /* Check that the relocation address is valid... */
+ if (reloc->offset > obj->size - 4) {
+ DRM_ERROR("Relocation beyond object bounds: "
+ "obj %p target %d offset %d size %d.\n",
+ obj, reloc->target_handle,
+ (int) reloc->offset, (int) obj->size);
+ drm_gem_object_unreference(target_obj);
+ i915_gem_object_unpin(obj);
+ return -EINVAL;
+ }
+ if (reloc->offset & 3) {
+ DRM_ERROR("Relocation not 4-byte aligned: "
+ "obj %p target %d offset %d.\n",
+ obj, reloc->target_handle,
+ (int) reloc->offset);
+ drm_gem_object_unreference(target_obj);
+ i915_gem_object_unpin(obj);
+ return -EINVAL;
+ }
+
+ /* and points to somewhere within the target object. */
+ if (reloc->delta >= target_obj->size) {
+ DRM_ERROR("Relocation beyond target object bounds: "
+ "obj %p target %d delta %d size %d.\n",
+ obj, reloc->target_handle,
+ (int) reloc->delta, (int) target_obj->size);
+ drm_gem_object_unreference(target_obj);
+ i915_gem_object_unpin(obj);
+ return -EINVAL;
+ }
+
ret = i915_gem_object_set_to_gtt_domain(obj, 1);
if (ret != 0) {
drm_gem_object_unreference(target_obj);
@@ -3126,6 +3352,8 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
exec_len = (uint32_t) exec->batch_len;
+ trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno);
+
count = nbox ? nbox : 1;
for (i = 0; i < count; i++) {
@@ -3363,7 +3591,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
i915_verify_inactive(dev, __FILE__, __LINE__);
- if (dev_priv->mm.wedged) {
+ if (atomic_read(&dev_priv->mm.wedged)) {
DRM_ERROR("Execbuf while wedged\n");
mutex_unlock(&dev->struct_mutex);
ret = -EIO;
@@ -3421,8 +3649,23 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
/* error other than GTT full, or we've already tried again */
if (ret != -ENOSPC || pin_tries >= 1) {
- if (ret != -ERESTARTSYS)
- DRM_ERROR("Failed to pin buffers %d\n", ret);
+ if (ret != -ERESTARTSYS) {
+ unsigned long long total_size = 0;
+ for (i = 0; i < args->buffer_count; i++)
+ total_size += object_list[i]->size;
+ DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n",
+ pinned+1, args->buffer_count,
+ total_size, ret);
+ DRM_ERROR("%d objects [%d pinned], "
+ "%d object bytes [%d pinned], "
+ "%d/%d gtt bytes\n",
+ atomic_read(&dev->object_count),
+ atomic_read(&dev->pin_count),
+ atomic_read(&dev->object_memory),
+ atomic_read(&dev->pin_memory),
+ atomic_read(&dev->gtt_memory),
+ dev->gtt_total);
+ }
goto err;
}
@@ -3433,7 +3676,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
/* evict everyone we can from the aperture */
ret = i915_gem_evict_everything(dev);
- if (ret)
+ if (ret && ret != -ENOSPC)
goto err;
}
@@ -3489,8 +3732,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
for (i = 0; i < args->buffer_count; i++) {
struct drm_gem_object *obj = object_list[i];
+ uint32_t old_write_domain = obj->write_domain;
obj->write_domain = obj->pending_write_domain;
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
i915_verify_inactive(dev, __FILE__, __LINE__);
@@ -3607,11 +3854,8 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
i915_verify_inactive(dev, __FILE__, __LINE__);
if (obj_priv->gtt_space == NULL) {
ret = i915_gem_object_bind_to_gtt(obj, alignment);
- if (ret != 0) {
- if (ret != -EBUSY && ret != -ERESTARTSYS)
- DRM_ERROR("Failure to bind: %d\n", ret);
+ if (ret)
return ret;
- }
}
/*
* Pre-965 chips need a fence register set up in order to
@@ -3691,6 +3935,13 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
}
obj_priv = obj->driver_private;
+ if (obj_priv->madv != I915_MADV_WILLNEED) {
+ DRM_ERROR("Attempting to pin a purgeable buffer\n");
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+ return -EINVAL;
+ }
+
if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
args->handle);
@@ -3803,6 +4054,56 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
return i915_gem_ring_throttle(dev, file_priv);
}
+int
+i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_i915_gem_madvise *args = data;
+ struct drm_gem_object *obj;
+ struct drm_i915_gem_object *obj_priv;
+
+ switch (args->madv) {
+ case I915_MADV_DONTNEED:
+ case I915_MADV_WILLNEED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+ if (obj == NULL) {
+ DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
+ args->handle);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ obj_priv = obj->driver_private;
+
+ if (obj_priv->pin_count) {
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
+ return -EINVAL;
+ }
+
+ if (obj_priv->madv != __I915_MADV_PURGED)
+ obj_priv->madv = args->madv;
+
+ /* if the object is no longer bound, discard its backing storage */
+ if (i915_gem_object_is_purgeable(obj_priv) &&
+ obj_priv->gtt_space == NULL)
+ i915_gem_object_truncate(obj);
+
+ args->retained = obj_priv->madv != __I915_MADV_PURGED;
+
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return 0;
+}
+
int i915_gem_init_object(struct drm_gem_object *obj)
{
struct drm_i915_gem_object *obj_priv;
@@ -3827,6 +4128,9 @@ int i915_gem_init_object(struct drm_gem_object *obj)
obj_priv->fence_reg = I915_FENCE_REG_NONE;
INIT_LIST_HEAD(&obj_priv->list);
INIT_LIST_HEAD(&obj_priv->fence_list);
+ obj_priv->madv = I915_MADV_WILLNEED;
+
+ trace_i915_gem_object_create(obj);
return 0;
}
@@ -3836,6 +4140,8 @@ void i915_gem_free_object(struct drm_gem_object *obj)
struct drm_device *dev = obj->dev;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ trace_i915_gem_object_destroy(obj);
+
while (obj_priv->pin_count > 0)
i915_gem_object_unpin(obj);
@@ -3844,43 +4150,35 @@ void i915_gem_free_object(struct drm_gem_object *obj)
i915_gem_object_unbind(obj);
- i915_gem_free_mmap_offset(obj);
+ if (obj_priv->mmap_offset)
+ i915_gem_free_mmap_offset(obj);
kfree(obj_priv->page_cpu_valid);
kfree(obj_priv->bit_17);
kfree(obj->driver_private);
}
-/** Unbinds all objects that are on the given buffer list. */
+/** Unbinds all inactive objects. */
static int
-i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
+i915_gem_evict_from_inactive_list(struct drm_device *dev)
{
- struct drm_gem_object *obj;
- struct drm_i915_gem_object *obj_priv;
- int ret;
+ drm_i915_private_t *dev_priv = dev->dev_private;
- while (!list_empty(head)) {
- obj_priv = list_first_entry(head,
- struct drm_i915_gem_object,
- list);
- obj = obj_priv->obj;
+ while (!list_empty(&dev_priv->mm.inactive_list)) {
+ struct drm_gem_object *obj;
+ int ret;
- if (obj_priv->pin_count != 0) {
- DRM_ERROR("Pinned object in unbind list\n");
- mutex_unlock(&dev->struct_mutex);
- return -EINVAL;
- }
+ obj = list_first_entry(&dev_priv->mm.inactive_list,
+ struct drm_i915_gem_object,
+ list)->obj;
ret = i915_gem_object_unbind(obj);
if (ret != 0) {
- DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
- ret);
- mutex_unlock(&dev->struct_mutex);
+ DRM_ERROR("Error unbinding object: %d\n", ret);
return ret;
}
}
-
return 0;
}
@@ -3902,6 +4200,7 @@ i915_gem_idle(struct drm_device *dev)
* We need to replace this with a semaphore, or something.
*/
dev_priv->mm.suspended = 1;
+ del_timer(&dev_priv->hangcheck_timer);
/* Cancel the retire work handler, wait for it to finish if running
*/
@@ -3931,7 +4230,7 @@ i915_gem_idle(struct drm_device *dev)
if (last_seqno == cur_seqno) {
if (stuck++ > 100) {
DRM_ERROR("hardware wedged\n");
- dev_priv->mm.wedged = 1;
+ atomic_set(&dev_priv->mm.wedged, 1);
DRM_WAKEUP(&dev_priv->irq_queue);
break;
}
@@ -3944,7 +4243,7 @@ i915_gem_idle(struct drm_device *dev)
i915_gem_retire_requests(dev);
spin_lock(&dev_priv->mm.active_list_lock);
- if (!dev_priv->mm.wedged) {
+ if (!atomic_read(&dev_priv->mm.wedged)) {
/* Active and flushing should now be empty as we've
* waited for a sequence higher than any pending execbuffer
*/
@@ -3962,29 +4261,41 @@ i915_gem_idle(struct drm_device *dev)
* the GPU domains and just stuff them onto inactive.
*/
while (!list_empty(&dev_priv->mm.active_list)) {
- struct drm_i915_gem_object *obj_priv;
+ struct drm_gem_object *obj;
+ uint32_t old_write_domain;
- obj_priv = list_first_entry(&dev_priv->mm.active_list,
- struct drm_i915_gem_object,
- list);
- obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
- i915_gem_object_move_to_inactive(obj_priv->obj);
+ obj = list_first_entry(&dev_priv->mm.active_list,
+ struct drm_i915_gem_object,
+ list)->obj;
+ old_write_domain = obj->write_domain;
+ obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
+ i915_gem_object_move_to_inactive(obj);
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
spin_unlock(&dev_priv->mm.active_list_lock);
while (!list_empty(&dev_priv->mm.flushing_list)) {
- struct drm_i915_gem_object *obj_priv;
+ struct drm_gem_object *obj;
+ uint32_t old_write_domain;
- obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
- struct drm_i915_gem_object,
- list);
- obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
- i915_gem_object_move_to_inactive(obj_priv->obj);
+ obj = list_first_entry(&dev_priv->mm.flushing_list,
+ struct drm_i915_gem_object,
+ list)->obj;
+ old_write_domain = obj->write_domain;
+ obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
+ i915_gem_object_move_to_inactive(obj);
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
/* Move all inactive buffers out of the GTT. */
- ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list);
+ ret = i915_gem_evict_from_inactive_list(dev);
WARN_ON(!list_empty(&dev_priv->mm.inactive_list));
if (ret) {
mutex_unlock(&dev->struct_mutex);
@@ -4206,9 +4517,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
if (drm_core_check_feature(dev, DRIVER_MODESET))
return 0;
- if (dev_priv->mm.wedged) {
+ if (atomic_read(&dev_priv->mm.wedged)) {
DRM_ERROR("Reenabling wedged hardware, good luck\n");
- dev_priv->mm.wedged = 0;
+ atomic_set(&dev_priv->mm.wedged, 0);
}
mutex_lock(&dev->struct_mutex);
@@ -4274,6 +4585,10 @@ i915_gem_load(struct drm_device *dev)
i915_gem_retire_work_handler);
dev_priv->mm.next_gem_seqno = 1;
+ spin_lock(&shrink_list_lock);
+ list_add(&dev_priv->mm.shrink_list, &shrink_list);
+ spin_unlock(&shrink_list_lock);
+
/* Old X drivers will take 0-2 for front, back, depth buffers */
dev_priv->fence_reg_start = 3;
@@ -4491,3 +4806,116 @@ void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
list_del_init(i915_file_priv->mm.request_list.next);
mutex_unlock(&dev->struct_mutex);
}
+
+static int
+i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
+{
+ drm_i915_private_t *dev_priv, *next_dev;
+ struct drm_i915_gem_object *obj_priv, *next_obj;
+ int cnt = 0;
+ int would_deadlock = 1;
+
+ /* "fast-path" to count number of available objects */
+ if (nr_to_scan == 0) {
+ spin_lock(&shrink_list_lock);
+ list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
+ struct drm_device *dev = dev_priv->dev;
+
+ if (mutex_trylock(&dev->struct_mutex)) {
+ list_for_each_entry(obj_priv,
+ &dev_priv->mm.inactive_list,
+ list)
+ cnt++;
+ mutex_unlock(&dev->struct_mutex);
+ }
+ }
+ spin_unlock(&shrink_list_lock);
+
+ return (cnt / 100) * sysctl_vfs_cache_pressure;
+ }
+
+ spin_lock(&shrink_list_lock);
+
+ /* first scan for clean buffers */
+ list_for_each_entry_safe(dev_priv, next_dev,
+ &shrink_list, mm.shrink_list) {
+ struct drm_device *dev = dev_priv->dev;
+
+ if (! mutex_trylock(&dev->struct_mutex))
+ continue;
+
+ spin_unlock(&shrink_list_lock);
+
+ i915_gem_retire_requests(dev);
+
+ list_for_each_entry_safe(obj_priv, next_obj,
+ &dev_priv->mm.inactive_list,
+ list) {
+ if (i915_gem_object_is_purgeable(obj_priv)) {
+ i915_gem_object_unbind(obj_priv->obj);
+ if (--nr_to_scan <= 0)
+ break;
+ }
+ }
+
+ spin_lock(&shrink_list_lock);
+ mutex_unlock(&dev->struct_mutex);
+
+ would_deadlock = 0;
+
+ if (nr_to_scan <= 0)
+ break;
+ }
+
+ /* second pass, evict/count anything still on the inactive list */
+ list_for_each_entry_safe(dev_priv, next_dev,
+ &shrink_list, mm.shrink_list) {
+ struct drm_device *dev = dev_priv->dev;
+
+ if (! mutex_trylock(&dev->struct_mutex))
+ continue;
+
+ spin_unlock(&shrink_list_lock);
+
+ list_for_each_entry_safe(obj_priv, next_obj,
+ &dev_priv->mm.inactive_list,
+ list) {
+ if (nr_to_scan > 0) {
+ i915_gem_object_unbind(obj_priv->obj);
+ nr_to_scan--;
+ } else
+ cnt++;
+ }
+
+ spin_lock(&shrink_list_lock);
+ mutex_unlock(&dev->struct_mutex);
+
+ would_deadlock = 0;
+ }
+
+ spin_unlock(&shrink_list_lock);
+
+ if (would_deadlock)
+ return -1;
+ else if (cnt > 0)
+ return (cnt / 100) * sysctl_vfs_cache_pressure;
+ else
+ return 0;
+}
+
+static struct shrinker shrinker = {
+ .shrink = i915_gem_shrink,
+ .seeks = DEFAULT_SEEKS,
+};
+
+__init void
+i915_gem_shrinker_init(void)
+{
+ register_shrinker(&shrinker);
+}
+
+__exit void
+i915_gem_shrinker_exit(void)
+{
+ unregister_shrinker(&shrinker);
+}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6c89f2ff249..4dfeec7cdd4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -31,6 +31,7 @@
#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
+#include "i915_trace.h"
#include "intel_drv.h"
#define MAX_NOPID ((u32)~0)
@@ -279,7 +280,9 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev)
}
if (gt_iir & GT_USER_INTERRUPT) {
- dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+ u32 seqno = i915_get_gem_seqno(dev);
+ dev_priv->mm.irq_gem_seqno = seqno;
+ trace_i915_gem_request_complete(dev, seqno);
DRM_WAKEUP(&dev_priv->irq_queue);
}
@@ -302,12 +305,25 @@ static void i915_error_work_func(struct work_struct *work)
drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
error_work);
struct drm_device *dev = dev_priv->dev;
- char *event_string = "ERROR=1";
- char *envp[] = { event_string, NULL };
+ char *error_event[] = { "ERROR=1", NULL };
+ char *reset_event[] = { "RESET=1", NULL };
+ char *reset_done_event[] = { "ERROR=0", NULL };
DRM_DEBUG("generating error event\n");
-
- kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp);
+ kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
+
+ if (atomic_read(&dev_priv->mm.wedged)) {
+ if (IS_I965G(dev)) {
+ DRM_DEBUG("resetting chip\n");
+ kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event);
+ if (!i965_reset(dev, GDRST_RENDER)) {
+ atomic_set(&dev_priv->mm.wedged, 0);
+ kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event);
+ }
+ } else {
+ printk("reboot required\n");
+ }
+ }
}
/**
@@ -372,7 +388,7 @@ out:
* so userspace knows something bad happened (should trigger collection
* of a ring dump etc.).
*/
-static void i915_handle_error(struct drm_device *dev)
+static void i915_handle_error(struct drm_device *dev, bool wedged)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 eir = I915_READ(EIR);
@@ -482,6 +498,16 @@ static void i915_handle_error(struct drm_device *dev)
I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
}
+ if (wedged) {
+ atomic_set(&dev_priv->mm.wedged, 1);
+
+ /*
+ * Wakeup waiting processes so they don't hang
+ */
+ printk("i915: Waking up sleeping processes\n");
+ DRM_WAKEUP(&dev_priv->irq_queue);
+ }
+
queue_work(dev_priv->wq, &dev_priv->error_work);
}
@@ -527,7 +553,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
pipeb_stats = I915_READ(PIPEBSTAT);
if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
- i915_handle_error(dev);
+ i915_handle_error(dev, false);
/*
* Clear the PIPE(A|B)STAT regs before the IIR
@@ -599,8 +625,12 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
}
if (iir & I915_USER_INTERRUPT) {
- dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+ u32 seqno = i915_get_gem_seqno(dev);
+ dev_priv->mm.irq_gem_seqno = seqno;
+ trace_i915_gem_request_complete(dev, seqno);
DRM_WAKEUP(&dev_priv->irq_queue);
+ dev_priv->hangcheck_count = 0;
+ mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
}
if (pipea_stats & vblank_status) {
@@ -880,6 +910,52 @@ int i915_vblank_swap(struct drm_device *dev, void *data,
return -EINVAL;
}
+struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) {
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list);
+}
+
+/**
+ * This is called when the chip hasn't reported back with completed
+ * batchbuffers in a long time. The first time this is called we simply record
+ * ACTHD. If ACTHD hasn't changed by the time the hangcheck timer elapses
+ * again, we assume the chip is wedged and try to fix it.
+ */
+void i915_hangcheck_elapsed(unsigned long data)
+{
+ struct drm_device *dev = (struct drm_device *)data;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ uint32_t acthd;
+
+ if (!IS_I965G(dev))
+ acthd = I915_READ(ACTHD);
+ else
+ acthd = I915_READ(ACTHD_I965);
+
+ /* If all work is done then ACTHD clearly hasn't advanced. */
+ if (list_empty(&dev_priv->mm.request_list) ||
+ i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) {
+ dev_priv->hangcheck_count = 0;
+ return;
+ }
+
+ if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) {
+ DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
+ i915_handle_error(dev, true);
+ return;
+ }
+
+ /* Reset timer case chip hangs without another request being added */
+ mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
+
+ if (acthd != dev_priv->last_acthd)
+ dev_priv->hangcheck_count = 0;
+ else
+ dev_priv->hangcheck_count++;
+
+ dev_priv->last_acthd = acthd;
+}
+
/* drm_dma.h hooks
*/
static void igdng_irq_preinstall(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_opregion.c b/drivers/gpu/drm/i915/i915_opregion.c
index e4b4e8898e3..2d5193556d3 100644
--- a/drivers/gpu/drm/i915/i915_opregion.c
+++ b/drivers/gpu/drm/i915/i915_opregion.c
@@ -148,6 +148,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
struct drm_i915_private *dev_priv = dev->dev_private;
struct opregion_asle *asle = dev_priv->opregion.asle;
u32 blc_pwm_ctl, blc_pwm_ctl2;
+ u32 max_backlight, level, shift;
if (!(bclp & ASLE_BCLP_VALID))
return ASLE_BACKLIGHT_FAIL;
@@ -157,14 +158,25 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
return ASLE_BACKLIGHT_FAIL;
blc_pwm_ctl = I915_READ(BLC_PWM_CTL);
- blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK;
blc_pwm_ctl2 = I915_READ(BLC_PWM_CTL2);
- if (blc_pwm_ctl2 & BLM_COMBINATION_MODE)
+ if (IS_I965G(dev) && (blc_pwm_ctl2 & BLM_COMBINATION_MODE))
pci_write_config_dword(dev->pdev, PCI_LBPC, bclp);
- else
- I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | ((bclp * 0x101)-1));
-
+ else {
+ if (IS_IGD(dev)) {
+ blc_pwm_ctl &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1);
+ max_backlight = (blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >>
+ BACKLIGHT_MODULATION_FREQ_SHIFT;
+ shift = BACKLIGHT_DUTY_CYCLE_SHIFT + 1;
+ } else {
+ blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK;
+ max_backlight = ((blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >>
+ BACKLIGHT_MODULATION_FREQ_SHIFT) * 2;
+ shift = BACKLIGHT_DUTY_CYCLE_SHIFT;
+ }
+ level = (bclp * max_backlight) / 255;
+ I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | (level << shift));
+ }
asle->cblv = (bclp*0x64)/0xff | ASLE_CBLV_VALID;
return 0;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3f796355346..0466ddbeba3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -86,6 +86,10 @@
#define I915_GC_RENDER_CLOCK_200_MHZ (1 << 0)
#define I915_GC_RENDER_CLOCK_333_MHZ (4 << 0)
#define LBB 0xf4
+#define GDRST 0xc0
+#define GDRST_FULL (0<<2)
+#define GDRST_RENDER (1<<2)
+#define GDRST_MEDIA (3<<2)
/* VGA stuff */
@@ -344,9 +348,37 @@
#define FBC_CTL_PLANEA (0<<0)
#define FBC_CTL_PLANEB (1<<0)
#define FBC_FENCE_OFF 0x0321b
+#define FBC_TAG 0x03300
#define FBC_LL_SIZE (1536)
+/* Framebuffer compression for GM45+ */
+#define DPFC_CB_BASE 0x3200
+#define DPFC_CONTROL 0x3208
+#define DPFC_CTL_EN (1<<31)
+#define DPFC_CTL_PLANEA (0<<30)
+#define DPFC_CTL_PLANEB (1<<30)
+#define DPFC_CTL_FENCE_EN (1<<29)
+#define DPFC_SR_EN (1<<10)
+#define DPFC_CTL_LIMIT_1X (0<<6)
+#define DPFC_CTL_LIMIT_2X (1<<6)
+#define DPFC_CTL_LIMIT_4X (2<<6)
+#define DPFC_RECOMP_CTL 0x320c
+#define DPFC_RECOMP_STALL_EN (1<<27)
+#define DPFC_RECOMP_STALL_WM_SHIFT (16)
+#define DPFC_RECOMP_STALL_WM_MASK (0x07ff0000)
+#define DPFC_RECOMP_TIMER_COUNT_SHIFT (0)
+#define DPFC_RECOMP_TIMER_COUNT_MASK (0x0000003f)
+#define DPFC_STATUS 0x3210
+#define DPFC_INVAL_SEG_SHIFT (16)
+#define DPFC_INVAL_SEG_MASK (0x07ff0000)
+#define DPFC_COMP_SEG_SHIFT (0)
+#define DPFC_COMP_SEG_MASK (0x000003ff)
+#define DPFC_STATUS2 0x3214
+#define DPFC_FENCE_YOFF 0x3218
+#define DPFC_CHICKEN 0x3224
+#define DPFC_HT_MODIFY (1<<31)
+
/*
* GPIO regs
*/
@@ -2000,6 +2032,8 @@
#define PF_ENABLE (1<<31)
#define PFA_WIN_SZ 0x68074
#define PFB_WIN_SZ 0x68874
+#define PFA_WIN_POS 0x68070
+#define PFB_WIN_POS 0x68870
/* legacy palette */
#define LGC_PALETTE_A 0x4a000
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 20d4d19f556..bd6d8d91ca9 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -228,6 +228,7 @@ static void i915_save_modeset_reg(struct drm_device *dev)
if (drm_core_check_feature(dev, DRIVER_MODESET))
return;
+
/* Pipe & plane A info */
dev_priv->savePIPEACONF = I915_READ(PIPEACONF);
dev_priv->savePIPEASRC = I915_READ(PIPEASRC);
@@ -285,6 +286,7 @@ static void i915_save_modeset_reg(struct drm_device *dev)
dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT);
return;
}
+
static void i915_restore_modeset_reg(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -379,19 +381,10 @@ static void i915_restore_modeset_reg(struct drm_device *dev)
return;
}
-int i915_save_state(struct drm_device *dev)
+
+void i915_save_display(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- int i;
-
- pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
-
- /* Render Standby */
- if (IS_I965G(dev) && IS_MOBILE(dev))
- dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY);
-
- /* Hardware status page */
- dev_priv->saveHWS = I915_READ(HWS_PGA);
/* Display arbitration control */
dev_priv->saveDSPARB = I915_READ(DSPARB);
@@ -399,6 +392,7 @@ int i915_save_state(struct drm_device *dev)
/* This is only meaningful in non-KMS mode */
/* Don't save them in KMS mode */
i915_save_modeset_reg(dev);
+
/* Cursor state */
dev_priv->saveCURACNTR = I915_READ(CURACNTR);
dev_priv->saveCURAPOS = I915_READ(CURAPOS);
@@ -448,81 +442,22 @@ int i915_save_state(struct drm_device *dev)
dev_priv->saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2);
dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL);
- /* Interrupt state */
- dev_priv->saveIIR = I915_READ(IIR);
- dev_priv->saveIER = I915_READ(IER);
- dev_priv->saveIMR = I915_READ(IMR);
-
/* VGA state */
dev_priv->saveVGA0 = I915_READ(VGA0);
dev_priv->saveVGA1 = I915_READ(VGA1);
dev_priv->saveVGA_PD = I915_READ(VGA_PD);
dev_priv->saveVGACNTRL = I915_READ(VGACNTRL);
- /* Clock gating state */
- dev_priv->saveD_STATE = I915_READ(D_STATE);
- dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D);
-
- /* Cache mode state */
- dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
-
- /* Memory Arbitration state */
- dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
-
- /* Scratch space */
- for (i = 0; i < 16; i++) {
- dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
- dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
- }
- for (i = 0; i < 3; i++)
- dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
-
- /* Fences */
- if (IS_I965G(dev)) {
- for (i = 0; i < 16; i++)
- dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
- } else {
- for (i = 0; i < 8; i++)
- dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
-
- if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
- for (i = 0; i < 8; i++)
- dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
- }
i915_save_vga(dev);
-
- return 0;
}
-int i915_restore_state(struct drm_device *dev)
+void i915_restore_display(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- int i;
-
- pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
-
- /* Render Standby */
- if (IS_I965G(dev) && IS_MOBILE(dev))
- I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY);
-
- /* Hardware status page */
- I915_WRITE(HWS_PGA, dev_priv->saveHWS);
/* Display arbitration */
I915_WRITE(DSPARB, dev_priv->saveDSPARB);
- /* Fences */
- if (IS_I965G(dev)) {
- for (i = 0; i < 16; i++)
- I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
- } else {
- for (i = 0; i < 8; i++)
- I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
- if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
- for (i = 0; i < 8; i++)
- I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
- }
-
/* Display port ratios (must be done before clock is set) */
if (SUPPORTS_INTEGRATED_DP(dev)) {
I915_WRITE(PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M);
@@ -534,9 +469,11 @@ int i915_restore_state(struct drm_device *dev)
I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N);
I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N);
}
+
/* This is only meaningful in non-KMS mode */
/* Don't restore them in KMS mode */
i915_restore_modeset_reg(dev);
+
/* Cursor state */
I915_WRITE(CURAPOS, dev_priv->saveCURAPOS);
I915_WRITE(CURACNTR, dev_priv->saveCURACNTR);
@@ -586,6 +523,95 @@ int i915_restore_state(struct drm_device *dev)
I915_WRITE(VGA_PD, dev_priv->saveVGA_PD);
DRM_UDELAY(150);
+ i915_restore_vga(dev);
+}
+
+int i915_save_state(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ int i;
+
+ pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
+
+ /* Render Standby */
+ if (IS_I965G(dev) && IS_MOBILE(dev))
+ dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY);
+
+ /* Hardware status page */
+ dev_priv->saveHWS = I915_READ(HWS_PGA);
+
+ i915_save_display(dev);
+
+ /* Interrupt state */
+ dev_priv->saveIER = I915_READ(IER);
+ dev_priv->saveIMR = I915_READ(IMR);
+
+ /* Clock gating state */
+ dev_priv->saveD_STATE = I915_READ(D_STATE);
+ dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D); /* Not sure about this */
+
+ /* Cache mode state */
+ dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
+
+ /* Memory Arbitration state */
+ dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
+
+ /* Scratch space */
+ for (i = 0; i < 16; i++) {
+ dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
+ dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
+ }
+ for (i = 0; i < 3; i++)
+ dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
+
+ /* Fences */
+ if (IS_I965G(dev)) {
+ for (i = 0; i < 16; i++)
+ dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
+ } else {
+ for (i = 0; i < 8; i++)
+ dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
+
+ if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+ for (i = 0; i < 8; i++)
+ dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
+ }
+
+ return 0;
+}
+
+int i915_restore_state(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ int i;
+
+ pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
+
+ /* Render Standby */
+ if (IS_I965G(dev) && IS_MOBILE(dev))
+ I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY);
+
+ /* Hardware status page */
+ I915_WRITE(HWS_PGA, dev_priv->saveHWS);
+
+ /* Fences */
+ if (IS_I965G(dev)) {
+ for (i = 0; i < 16; i++)
+ I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
+ } else {
+ for (i = 0; i < 8; i++)
+ I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
+ if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+ for (i = 0; i < 8; i++)
+ I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
+ }
+
+ i915_restore_display(dev);
+
+ /* Interrupt state */
+ I915_WRITE (IER, dev_priv->saveIER);
+ I915_WRITE (IMR, dev_priv->saveIMR);
+
/* Clock gating state */
I915_WRITE (D_STATE, dev_priv->saveD_STATE);
I915_WRITE (DSPCLK_GATE_D, dev_priv->saveDSPCLK_GATE_D);
@@ -603,8 +629,6 @@ int i915_restore_state(struct drm_device *dev)
for (i = 0; i < 3; i++)
I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]);
- i915_restore_vga(dev);
-
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
new file mode 100644
index 00000000000..5567a40816f
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -0,0 +1,315 @@
+#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _I915_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <drm/drmP.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM i915
+#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
+#define TRACE_INCLUDE_FILE i915_trace
+
+/* object tracking */
+
+TRACE_EVENT(i915_gem_object_create,
+
+ TP_PROTO(struct drm_gem_object *obj),
+
+ TP_ARGS(obj),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ __field(u32, size)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ __entry->size = obj->size;
+ ),
+
+ TP_printk("obj=%p, size=%u", __entry->obj, __entry->size)
+);
+
+TRACE_EVENT(i915_gem_object_bind,
+
+ TP_PROTO(struct drm_gem_object *obj, u32 gtt_offset),
+
+ TP_ARGS(obj, gtt_offset),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ __field(u32, gtt_offset)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ __entry->gtt_offset = gtt_offset;
+ ),
+
+ TP_printk("obj=%p, gtt_offset=%08x",
+ __entry->obj, __entry->gtt_offset)
+);
+
+TRACE_EVENT(i915_gem_object_clflush,
+
+ TP_PROTO(struct drm_gem_object *obj),
+
+ TP_ARGS(obj),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ ),
+
+ TP_printk("obj=%p", __entry->obj)
+);
+
+TRACE_EVENT(i915_gem_object_change_domain,
+
+ TP_PROTO(struct drm_gem_object *obj, uint32_t old_read_domains, uint32_t old_write_domain),
+
+ TP_ARGS(obj, old_read_domains, old_write_domain),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ __field(u32, read_domains)
+ __field(u32, write_domain)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ __entry->read_domains = obj->read_domains | (old_read_domains << 16);
+ __entry->write_domain = obj->write_domain | (old_write_domain << 16);
+ ),
+
+ TP_printk("obj=%p, read=%04x, write=%04x",
+ __entry->obj,
+ __entry->read_domains, __entry->write_domain)
+);
+
+TRACE_EVENT(i915_gem_object_get_fence,
+
+ TP_PROTO(struct drm_gem_object *obj, int fence, int tiling_mode),
+
+ TP_ARGS(obj, fence, tiling_mode),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ __field(int, fence)
+ __field(int, tiling_mode)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ __entry->fence = fence;
+ __entry->tiling_mode = tiling_mode;
+ ),
+
+ TP_printk("obj=%p, fence=%d, tiling=%d",
+ __entry->obj, __entry->fence, __entry->tiling_mode)
+);
+
+TRACE_EVENT(i915_gem_object_unbind,
+
+ TP_PROTO(struct drm_gem_object *obj),
+
+ TP_ARGS(obj),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ ),
+
+ TP_printk("obj=%p", __entry->obj)
+);
+
+TRACE_EVENT(i915_gem_object_destroy,
+
+ TP_PROTO(struct drm_gem_object *obj),
+
+ TP_ARGS(obj),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ ),
+
+ TP_printk("obj=%p", __entry->obj)
+);
+
+/* batch tracing */
+
+TRACE_EVENT(i915_gem_request_submit,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno),
+
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_flush,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno,
+ u32 flush_domains, u32 invalidate_domains),
+
+ TP_ARGS(dev, seqno, flush_domains, invalidate_domains),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ __field(u32, flush_domains)
+ __field(u32, invalidate_domains)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ __entry->flush_domains = flush_domains;
+ __entry->invalidate_domains = invalidate_domains;
+ ),
+
+ TP_printk("dev=%p, seqno=%u, flush=%04x, invalidate=%04x",
+ __entry->dev, __entry->seqno,
+ __entry->flush_domains, __entry->invalidate_domains)
+);
+
+
+TRACE_EVENT(i915_gem_request_complete,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno),
+
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_retire,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno),
+
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_wait_begin,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno),
+
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_wait_end,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno),
+
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_ring_wait_begin,
+
+ TP_PROTO(struct drm_device *dev),
+
+ TP_ARGS(dev),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ ),
+
+ TP_printk("dev=%p", __entry->dev)
+);
+
+TRACE_EVENT(i915_ring_wait_end,
+
+ TP_PROTO(struct drm_device *dev),
+
+ TP_ARGS(dev),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ ),
+
+ TP_printk("dev=%p", __entry->dev)
+);
+
+#endif /* _I915_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/i915/i915_trace_points.c b/drivers/gpu/drm/i915/i915_trace_points.c
new file mode 100644
index 00000000000..ead876eb6ea
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_trace_points.c
@@ -0,0 +1,11 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ */
+
+#include "i915_drv.h"
+
+#define CREATE_TRACE_POINTS
+#include "i915_trace.h"
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 1e28c1652fd..4337414846b 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -217,6 +217,9 @@ parse_general_features(struct drm_i915_private *dev_priv,
if (IS_I85X(dev_priv->dev))
dev_priv->lvds_ssc_freq =
general->ssc_freq ? 66 : 48;
+ else if (IS_IGDNG(dev_priv->dev))
+ dev_priv->lvds_ssc_freq =
+ general->ssc_freq ? 100 : 120;
else
dev_priv->lvds_ssc_freq =
general->ssc_freq ? 100 : 96;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 88814fa2dfd..212e22740fc 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -179,13 +179,10 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector)
{
struct drm_device *dev = connector->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- u32 adpa, temp;
+ u32 adpa;
bool ret;
- temp = adpa = I915_READ(PCH_ADPA);
-
- adpa &= ~ADPA_DAC_ENABLE;
- I915_WRITE(PCH_ADPA, adpa);
+ adpa = I915_READ(PCH_ADPA);
adpa &= ~ADPA_CRT_HOTPLUG_MASK;
@@ -212,8 +209,6 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector)
else
ret = false;
- /* restore origin register */
- I915_WRITE(PCH_ADPA, temp);
return ret;
}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 0227b165290..93ff6c03733 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -24,6 +24,8 @@
* Eric Anholt <eric@anholt.net>
*/
+#include <linux/module.h>
+#include <linux/input.h>
#include <linux/i2c.h>
#include <linux/kernel.h>
#include "drmP.h"
@@ -875,7 +877,7 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
refclk, best_clock);
if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
- if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) ==
+ if ((I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) ==
LVDS_CLKB_POWER_UP)
clock.p2 = limit->p2.p2_fast;
else
@@ -952,6 +954,241 @@ intel_wait_for_vblank(struct drm_device *dev)
mdelay(20);
}
+/* Parameters have changed, update FBC info */
+static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_framebuffer *fb = crtc->fb;
+ struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+ struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private;
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int plane, i;
+ u32 fbc_ctl, fbc_ctl2;
+
+ dev_priv->cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
+
+ if (fb->pitch < dev_priv->cfb_pitch)
+ dev_priv->cfb_pitch = fb->pitch;
+
+ /* FBC_CTL wants 64B units */
+ dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
+ dev_priv->cfb_fence = obj_priv->fence_reg;
+ dev_priv->cfb_plane = intel_crtc->plane;
+ plane = dev_priv->cfb_plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
+
+ /* Clear old tags */
+ for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
+ I915_WRITE(FBC_TAG + (i * 4), 0);
+
+ /* Set it up... */
+ fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | plane;
+ if (obj_priv->tiling_mode != I915_TILING_NONE)
+ fbc_ctl2 |= FBC_CTL_CPU_FENCE;
+ I915_WRITE(FBC_CONTROL2, fbc_ctl2);
+ I915_WRITE(FBC_FENCE_OFF, crtc->y);
+
+ /* enable it... */
+ fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC;
+ fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
+ fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT;
+ if (obj_priv->tiling_mode != I915_TILING_NONE)
+ fbc_ctl |= dev_priv->cfb_fence;
+ I915_WRITE(FBC_CONTROL, fbc_ctl);
+
+ DRM_DEBUG("enabled FBC, pitch %ld, yoff %d, plane %d, ",
+ dev_priv->cfb_pitch, crtc->y, dev_priv->cfb_plane);
+}
+
+void i8xx_disable_fbc(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ u32 fbc_ctl;
+
+ if (!I915_HAS_FBC(dev))
+ return;
+
+ /* Disable compression */
+ fbc_ctl = I915_READ(FBC_CONTROL);
+ fbc_ctl &= ~FBC_CTL_EN;
+ I915_WRITE(FBC_CONTROL, fbc_ctl);
+
+ /* Wait for compressing bit to clear */
+ while (I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING)
+ ; /* nothing */
+
+ intel_wait_for_vblank(dev);
+
+ DRM_DEBUG("disabled FBC\n");
+}
+
+static bool i8xx_fbc_enabled(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
+}
+
+static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_framebuffer *fb = crtc->fb;
+ struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+ struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private;
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int plane = (intel_crtc->plane == 0 ? DPFC_CTL_PLANEA :
+ DPFC_CTL_PLANEB);
+ unsigned long stall_watermark = 200;
+ u32 dpfc_ctl;
+
+ dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
+ dev_priv->cfb_fence = obj_priv->fence_reg;
+ dev_priv->cfb_plane = intel_crtc->plane;
+
+ dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
+ if (obj_priv->tiling_mode != I915_TILING_NONE) {
+ dpfc_ctl |= DPFC_CTL_FENCE_EN | dev_priv->cfb_fence;
+ I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
+ } else {
+ I915_WRITE(DPFC_CHICKEN, ~DPFC_HT_MODIFY);
+ }
+
+ I915_WRITE(DPFC_CONTROL, dpfc_ctl);
+ I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
+ (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
+ (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
+ I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
+
+ /* enable it... */
+ I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
+
+ DRM_DEBUG("enabled fbc on plane %d\n", intel_crtc->plane);
+}
+
+void g4x_disable_fbc(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ u32 dpfc_ctl;
+
+ /* Disable compression */
+ dpfc_ctl = I915_READ(DPFC_CONTROL);
+ dpfc_ctl &= ~DPFC_CTL_EN;
+ I915_WRITE(DPFC_CONTROL, dpfc_ctl);
+ intel_wait_for_vblank(dev);
+
+ DRM_DEBUG("disabled FBC\n");
+}
+
+static bool g4x_fbc_enabled(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
+}
+
+/**
+ * intel_update_fbc - enable/disable FBC as needed
+ * @crtc: CRTC to point the compressor at
+ * @mode: mode in use
+ *
+ * Set up the framebuffer compression hardware at mode set time. We
+ * enable it if possible:
+ * - plane A only (on pre-965)
+ * - no pixel mulitply/line duplication
+ * - no alpha buffer discard
+ * - no dual wide
+ * - framebuffer <= 2048 in width, 1536 in height
+ *
+ * We can't assume that any compression will take place (worst case),
+ * so the compressed buffer has to be the same size as the uncompressed
+ * one. It also must reside (along with the line length buffer) in
+ * stolen memory.
+ *
+ * We need to enable/disable FBC on a global basis.
+ */
+static void intel_update_fbc(struct drm_crtc *crtc,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_framebuffer *fb = crtc->fb;
+ struct intel_framebuffer *intel_fb;
+ struct drm_i915_gem_object *obj_priv;
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int plane = intel_crtc->plane;
+
+ if (!i915_powersave)
+ return;
+
+ if (!dev_priv->display.fbc_enabled ||
+ !dev_priv->display.enable_fbc ||
+ !dev_priv->display.disable_fbc)
+ return;
+
+ if (!crtc->fb)
+ return;
+
+ intel_fb = to_intel_framebuffer(fb);
+ obj_priv = intel_fb->obj->driver_private;
+
+ /*
+ * If FBC is already on, we just have to verify that we can
+ * keep it that way...
+ * Need to disable if:
+ * - changing FBC params (stride, fence, mode)
+ * - new fb is too large to fit in compressed buffer
+ * - going to an unsupported config (interlace, pixel multiply, etc.)
+ */
+ if (intel_fb->obj->size > dev_priv->cfb_size) {
+ DRM_DEBUG("framebuffer too large, disabling compression\n");
+ goto out_disable;
+ }
+ if ((mode->flags & DRM_MODE_FLAG_INTERLACE) ||
+ (mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
+ DRM_DEBUG("mode incompatible with compression, disabling\n");
+ goto out_disable;
+ }
+ if ((mode->hdisplay > 2048) ||
+ (mode->vdisplay > 1536)) {
+ DRM_DEBUG("mode too large for compression, disabling\n");
+ goto out_disable;
+ }
+ if ((IS_I915GM(dev) || IS_I945GM(dev)) && plane != 0) {
+ DRM_DEBUG("plane not 0, disabling compression\n");
+ goto out_disable;
+ }
+ if (obj_priv->tiling_mode != I915_TILING_X) {
+ DRM_DEBUG("framebuffer not tiled, disabling compression\n");
+ goto out_disable;
+ }
+
+ if (dev_priv->display.fbc_enabled(crtc)) {
+ /* We can re-enable it in this case, but need to update pitch */
+ if (fb->pitch > dev_priv->cfb_pitch)
+ dev_priv->display.disable_fbc(dev);
+ if (obj_priv->fence_reg != dev_priv->cfb_fence)
+ dev_priv->display.disable_fbc(dev);
+ if (plane != dev_priv->cfb_plane)
+ dev_priv->display.disable_fbc(dev);
+ }
+
+ if (!dev_priv->display.fbc_enabled(crtc)) {
+ /* Now try to turn it back on if possible */
+ dev_priv->display.enable_fbc(crtc, 500);
+ }
+
+ return;
+
+out_disable:
+ DRM_DEBUG("unsupported config, disabling FBC\n");
+ /* Multiple disables should be harmless */
+ if (dev_priv->display.fbc_enabled(crtc))
+ dev_priv->display.disable_fbc(dev);
+}
+
static int
intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
struct drm_framebuffer *old_fb)
@@ -964,12 +1201,13 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
struct drm_i915_gem_object *obj_priv;
struct drm_gem_object *obj;
int pipe = intel_crtc->pipe;
+ int plane = intel_crtc->plane;
unsigned long Start, Offset;
- int dspbase = (pipe == 0 ? DSPAADDR : DSPBADDR);
- int dspsurf = (pipe == 0 ? DSPASURF : DSPBSURF);
- int dspstride = (pipe == 0) ? DSPASTRIDE : DSPBSTRIDE;
- int dsptileoff = (pipe == 0 ? DSPATILEOFF : DSPBTILEOFF);
- int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+ int dspbase = (plane == 0 ? DSPAADDR : DSPBADDR);
+ int dspsurf = (plane == 0 ? DSPASURF : DSPBSURF);
+ int dspstride = (plane == 0) ? DSPASTRIDE : DSPBSTRIDE;
+ int dsptileoff = (plane == 0 ? DSPATILEOFF : DSPBTILEOFF);
+ int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
u32 dspcntr, alignment;
int ret;
@@ -979,12 +1217,12 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
return 0;
}
- switch (pipe) {
+ switch (plane) {
case 0:
case 1:
break;
default:
- DRM_ERROR("Can't update pipe %d in SAREA\n", pipe);
+ DRM_ERROR("Can't update plane %d in SAREA\n", plane);
return -EINVAL;
}
@@ -1086,6 +1324,9 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
I915_READ(dspbase);
}
+ if ((IS_I965G(dev) || plane == 0))
+ intel_update_fbc(crtc, &crtc->mode);
+
intel_wait_for_vblank(dev);
if (old_fb) {
@@ -1217,6 +1458,7 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode)
int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF;
int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1;
int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ;
+ int pf_win_pos = (pipe == 0) ? PFA_WIN_POS : PFB_WIN_POS;
int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B;
@@ -1268,6 +1510,19 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode)
}
}
+ /* Enable panel fitting for LVDS */
+ if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
+ temp = I915_READ(pf_ctl_reg);
+ I915_WRITE(pf_ctl_reg, temp | PF_ENABLE);
+
+ /* currently full aspect */
+ I915_WRITE(pf_win_pos, 0);
+
+ I915_WRITE(pf_win_size,
+ (dev_priv->panel_fixed_mode->hdisplay << 16) |
+ (dev_priv->panel_fixed_mode->vdisplay));
+ }
+
/* Enable CPU pipe */
temp = I915_READ(pipeconf_reg);
if ((temp & PIPEACONF_ENABLE) == 0) {
@@ -1532,9 +1787,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int pipe = intel_crtc->pipe;
+ int plane = intel_crtc->plane;
int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
- int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
- int dspbase_reg = (pipe == 0) ? DSPAADDR : DSPBADDR;
+ int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
+ int dspbase_reg = (plane == 0) ? DSPAADDR : DSPBADDR;
int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
u32 temp;
@@ -1577,6 +1833,9 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
intel_crtc_load_lut(crtc);
+ if ((IS_I965G(dev) || plane == 0))
+ intel_update_fbc(crtc, &crtc->mode);
+
/* Give the overlay scaler a chance to enable if it's on this pipe */
//intel_crtc_dpms_video(crtc, true); TODO
intel_update_watermarks(dev);
@@ -1586,6 +1845,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
/* Give the overlay scaler a chance to disable if it's on this pipe */
//intel_crtc_dpms_video(crtc, FALSE); TODO
+ if (dev_priv->cfb_plane == plane &&
+ dev_priv->display.disable_fbc)
+ dev_priv->display.disable_fbc(dev);
+
/* Disable the VGA plane that we never use */
i915_disable_vga(dev);
@@ -1634,15 +1897,13 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
static void intel_crtc_dpms(struct drm_crtc *crtc, int mode)
{
struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_master_private *master_priv;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int pipe = intel_crtc->pipe;
bool enabled;
- if (IS_IGDNG(dev))
- igdng_crtc_dpms(crtc, mode);
- else
- i9xx_crtc_dpms(crtc, mode);
+ dev_priv->display.dpms(crtc, mode);
intel_crtc->dpms_mode = mode;
@@ -1709,56 +1970,68 @@ static bool intel_crtc_mode_fixup(struct drm_crtc *crtc,
return true;
}
+static int i945_get_display_clock_speed(struct drm_device *dev)
+{
+ return 400000;
+}
-/** Returns the core display clock speed for i830 - i945 */
-static int intel_get_core_clock_speed(struct drm_device *dev)
+static int i915_get_display_clock_speed(struct drm_device *dev)
{
+ return 333000;
+}
- /* Core clock values taken from the published datasheets.
- * The 830 may go up to 166 Mhz, which we should check.
- */
- if (IS_I945G(dev))
- return 400000;
- else if (IS_I915G(dev))
- return 333000;
- else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev))
- return 200000;
- else if (IS_I915GM(dev)) {
- u16 gcfgc = 0;
+static int i9xx_misc_get_display_clock_speed(struct drm_device *dev)
+{
+ return 200000;
+}
- pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+static int i915gm_get_display_clock_speed(struct drm_device *dev)
+{
+ u16 gcfgc = 0;
- if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
- return 133000;
- else {
- switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
- case GC_DISPLAY_CLOCK_333_MHZ:
- return 333000;
- default:
- case GC_DISPLAY_CLOCK_190_200_MHZ:
- return 190000;
- }
- }
- } else if (IS_I865G(dev))
- return 266000;
- else if (IS_I855(dev)) {
- u16 hpllcc = 0;
- /* Assume that the hardware is in the high speed state. This
- * should be the default.
- */
- switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
- case GC_CLOCK_133_200:
- case GC_CLOCK_100_200:
- return 200000;
- case GC_CLOCK_166_250:
- return 250000;
- case GC_CLOCK_100_133:
- return 133000;
+ pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+
+ if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
+ return 133000;
+ else {
+ switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
+ case GC_DISPLAY_CLOCK_333_MHZ:
+ return 333000;
+ default:
+ case GC_DISPLAY_CLOCK_190_200_MHZ:
+ return 190000;
}
- } else /* 852, 830 */
+ }
+}
+
+static int i865_get_display_clock_speed(struct drm_device *dev)
+{
+ return 266000;
+}
+
+static int i855_get_display_clock_speed(struct drm_device *dev)
+{
+ u16 hpllcc = 0;
+ /* Assume that the hardware is in the high speed state. This
+ * should be the default.
+ */
+ switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
+ case GC_CLOCK_133_200:
+ case GC_CLOCK_100_200:
+ return 200000;
+ case GC_CLOCK_166_250:
+ return 250000;
+ case GC_CLOCK_100_133:
return 133000;
+ }
+
+ /* Shouldn't happen */
+ return 0;
+}
- return 0; /* Silence gcc warning */
+static int i830_get_display_clock_speed(struct drm_device *dev)
+{
+ return 133000;
}
/**
@@ -1921,7 +2194,14 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
{
long entries_required, wm_size;
- entries_required = (clock_in_khz * pixel_size * latency_ns) / 1000000;
+ /*
+ * Note: we need to make sure we don't overflow for various clock &
+ * latency values.
+ * clocks go from a few thousand to several hundred thousand.
+ * latency is usually a few thousand
+ */
+ entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
+ 1000;
entries_required /= wm->cacheline_size;
DRM_DEBUG("FIFO entries required for mode: %d\n", entries_required);
@@ -1986,14 +2266,13 @@ static struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, int fsb,
for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
latency = &cxsr_latency_table[i];
if (is_desktop == latency->is_desktop &&
- fsb == latency->fsb_freq && mem == latency->mem_freq)
- break;
+ fsb == latency->fsb_freq && mem == latency->mem_freq)
+ return latency;
}
- if (i >= ARRAY_SIZE(cxsr_latency_table)) {
- DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n");
- return NULL;
- }
- return latency;
+
+ DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n");
+
+ return NULL;
}
static void igd_disable_cxsr(struct drm_device *dev)
@@ -2084,32 +2363,36 @@ static void igd_enable_cxsr(struct drm_device *dev, unsigned long clock,
*/
const static int latency_ns = 5000;
-static int intel_get_fifo_size(struct drm_device *dev, int plane)
+static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t dsparb = I915_READ(DSPARB);
int size;
- if (IS_I9XX(dev)) {
- if (plane == 0)
- size = dsparb & 0x7f;
- else
- size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) -
- (dsparb & 0x7f);
- } else if (IS_I85X(dev)) {
- if (plane == 0)
- size = dsparb & 0x1ff;
- else
- size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) -
- (dsparb & 0x1ff);
- size >>= 1; /* Convert to cachelines */
- } else if (IS_845G(dev)) {
+ if (plane == 0)
size = dsparb & 0x7f;
- size >>= 2; /* Convert to cachelines */
- } else {
- size = dsparb & 0x7f;
- size >>= 1; /* Convert to cachelines */
- }
+ else
+ size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) -
+ (dsparb & 0x7f);
+
+ DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+ size);
+
+ return size;
+}
+
+static int i85x_get_fifo_size(struct drm_device *dev, int plane)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ uint32_t dsparb = I915_READ(DSPARB);
+ int size;
+
+ if (plane == 0)
+ size = dsparb & 0x1ff;
+ else
+ size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) -
+ (dsparb & 0x1ff);
+ size >>= 1; /* Convert to cachelines */
DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
size);
@@ -2117,7 +2400,38 @@ static int intel_get_fifo_size(struct drm_device *dev, int plane)
return size;
}
-static void g4x_update_wm(struct drm_device *dev)
+static int i845_get_fifo_size(struct drm_device *dev, int plane)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ uint32_t dsparb = I915_READ(DSPARB);
+ int size;
+
+ size = dsparb & 0x7f;
+ size >>= 2; /* Convert to cachelines */
+
+ DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+ size);
+
+ return size;
+}
+
+static int i830_get_fifo_size(struct drm_device *dev, int plane)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ uint32_t dsparb = I915_READ(DSPARB);
+ int size;
+
+ size = dsparb & 0x7f;
+ size >>= 1; /* Convert to cachelines */
+
+ DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+ size);
+
+ return size;
+}
+
+static void g4x_update_wm(struct drm_device *dev, int unused, int unused2,
+ int unused3, int unused4)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 fw_blc_self = I915_READ(FW_BLC_SELF);
@@ -2129,7 +2443,8 @@ static void g4x_update_wm(struct drm_device *dev)
I915_WRITE(FW_BLC_SELF, fw_blc_self);
}
-static void i965_update_wm(struct drm_device *dev)
+static void i965_update_wm(struct drm_device *dev, int unused, int unused2,
+ int unused3, int unused4)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2165,8 +2480,8 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock,
cacheline_size = planea_params.cacheline_size;
/* Update per-plane FIFO sizes */
- planea_params.fifo_size = intel_get_fifo_size(dev, 0);
- planeb_params.fifo_size = intel_get_fifo_size(dev, 1);
+ planea_params.fifo_size = dev_priv->display.get_fifo_size(dev, 0);
+ planeb_params.fifo_size = dev_priv->display.get_fifo_size(dev, 1);
planea_wm = intel_calculate_wm(planea_clock, &planea_params,
pixel_size, latency_ns);
@@ -2213,14 +2528,14 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock,
I915_WRITE(FW_BLC2, fwater_hi);
}
-static void i830_update_wm(struct drm_device *dev, int planea_clock,
- int pixel_size)
+static void i830_update_wm(struct drm_device *dev, int planea_clock, int unused,
+ int unused2, int pixel_size)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t fwater_lo = I915_READ(FW_BLC) & ~0xfff;
int planea_wm;
- i830_wm_info.fifo_size = intel_get_fifo_size(dev, 0);
+ i830_wm_info.fifo_size = dev_priv->display.get_fifo_size(dev, 0);
planea_wm = intel_calculate_wm(planea_clock, &i830_wm_info,
pixel_size, latency_ns);
@@ -2264,6 +2579,7 @@ static void i830_update_wm(struct drm_device *dev, int planea_clock,
*/
static void intel_update_watermarks(struct drm_device *dev)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_crtc *crtc;
struct intel_crtc *intel_crtc;
int sr_hdisplay = 0;
@@ -2302,15 +2618,8 @@ static void intel_update_watermarks(struct drm_device *dev)
else if (IS_IGD(dev))
igd_disable_cxsr(dev);
- if (IS_G4X(dev))
- g4x_update_wm(dev);
- else if (IS_I965G(dev))
- i965_update_wm(dev);
- else if (IS_I9XX(dev) || IS_MOBILE(dev))
- i9xx_update_wm(dev, planea_clock, planeb_clock, sr_hdisplay,
- pixel_size);
- else
- i830_update_wm(dev, planea_clock, pixel_size);
+ dev_priv->display.update_wm(dev, planea_clock, planeb_clock,
+ sr_hdisplay, pixel_size);
}
static int intel_crtc_mode_set(struct drm_crtc *crtc,
@@ -2323,10 +2632,11 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int pipe = intel_crtc->pipe;
+ int plane = intel_crtc->plane;
int fp_reg = (pipe == 0) ? FPA0 : FPB0;
int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
int dpll_md_reg = (intel_crtc->pipe == 0) ? DPLL_A_MD : DPLL_B_MD;
- int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+ int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
int htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
int hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
@@ -2334,8 +2644,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
int vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B;
int vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B;
int vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B;
- int dspsize_reg = (pipe == 0) ? DSPASIZE : DSPBSIZE;
- int dsppos_reg = (pipe == 0) ? DSPAPOS : DSPBPOS;
+ int dspsize_reg = (plane == 0) ? DSPASIZE : DSPBSIZE;
+ int dsppos_reg = (plane == 0) ? DSPAPOS : DSPBPOS;
int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC;
int refclk, num_outputs = 0;
intel_clock_t clock, reduced_clock;
@@ -2568,7 +2878,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
enable color space conversion */
if (!IS_IGDNG(dev)) {
if (pipe == 0)
- dspcntr |= DISPPLANE_SEL_PIPE_A;
+ dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
else
dspcntr |= DISPPLANE_SEL_PIPE_B;
}
@@ -2580,7 +2890,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
* XXX: No double-wide on 915GM pipe B. Is that the only reason for the
* pipe == 0 check?
*/
- if (mode->clock > intel_get_core_clock_speed(dev) * 9 / 10)
+ if (mode->clock >
+ dev_priv->display.get_display_clock_speed(dev) * 9 / 10)
pipeconf |= PIPEACONF_DOUBLE_WIDE;
else
pipeconf &= ~PIPEACONF_DOUBLE_WIDE;
@@ -2652,9 +2963,12 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
udelay(150);
if (IS_I965G(dev) && !IS_IGDNG(dev)) {
- sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
- I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
+ if (is_sdvo) {
+ sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
+ I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT));
+ } else
+ I915_WRITE(dpll_md_reg, 0);
} else {
/* write it again -- the BIOS does, after all */
I915_WRITE(dpll_reg, dpll);
@@ -2734,6 +3048,9 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
/* Flush the plane changes */
ret = intel_pipe_set_base(crtc, x, y, old_fb);
+ if ((IS_I965G(dev) || plane == 0))
+ intel_update_fbc(crtc, &crtc->mode);
+
intel_update_watermarks(dev);
drm_vblank_post_modeset(dev, pipe);
@@ -2778,6 +3095,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
struct drm_gem_object *bo;
struct drm_i915_gem_object *obj_priv;
int pipe = intel_crtc->pipe;
+ int plane = intel_crtc->plane;
uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
uint32_t temp = I915_READ(control);
@@ -2863,6 +3181,10 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
i915_gem_object_unpin(intel_crtc->cursor_bo);
drm_gem_object_unreference(intel_crtc->cursor_bo);
}
+
+ if ((IS_I965G(dev) || plane == 0))
+ intel_update_fbc(crtc, &crtc->mode);
+
mutex_unlock(&dev->struct_mutex);
intel_crtc->cursor_addr = addr;
@@ -3544,6 +3866,14 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
intel_crtc->lut_b[i] = i;
}
+ /* Swap pipes & planes for FBC on pre-965 */
+ intel_crtc->pipe = pipe;
+ intel_crtc->plane = pipe;
+ if (IS_MOBILE(dev) && (IS_I9XX(dev) && !IS_I965G(dev))) {
+ DRM_DEBUG("swapping pipes & planes for FBC\n");
+ intel_crtc->plane = ((pipe == 0) ? 1 : 0);
+ }
+
intel_crtc->cursor_addr = 0;
intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF;
drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
@@ -3826,6 +4156,73 @@ void intel_init_clock_gating(struct drm_device *dev)
}
}
+/* Set up chip specific display functions */
+static void intel_init_display(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ /* We always want a DPMS function */
+ if (IS_IGDNG(dev))
+ dev_priv->display.dpms = igdng_crtc_dpms;
+ else
+ dev_priv->display.dpms = i9xx_crtc_dpms;
+
+ /* Only mobile has FBC, leave pointers NULL for other chips */
+ if (IS_MOBILE(dev)) {
+ if (IS_GM45(dev)) {
+ dev_priv->display.fbc_enabled = g4x_fbc_enabled;
+ dev_priv->display.enable_fbc = g4x_enable_fbc;
+ dev_priv->display.disable_fbc = g4x_disable_fbc;
+ } else if (IS_I965GM(dev) || IS_I945GM(dev) || IS_I915GM(dev)) {
+ dev_priv->display.fbc_enabled = i8xx_fbc_enabled;
+ dev_priv->display.enable_fbc = i8xx_enable_fbc;
+ dev_priv->display.disable_fbc = i8xx_disable_fbc;
+ }
+ /* 855GM needs testing */
+ }
+
+ /* Returns the core display clock speed */
+ if (IS_I945G(dev))
+ dev_priv->display.get_display_clock_speed =
+ i945_get_display_clock_speed;
+ else if (IS_I915G(dev))
+ dev_priv->display.get_display_clock_speed =
+ i915_get_display_clock_speed;
+ else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev))
+ dev_priv->display.get_display_clock_speed =
+ i9xx_misc_get_display_clock_speed;
+ else if (IS_I915GM(dev))
+ dev_priv->display.get_display_clock_speed =
+ i915gm_get_display_clock_speed;
+ else if (IS_I865G(dev))
+ dev_priv->display.get_display_clock_speed =
+ i865_get_display_clock_speed;
+ else if (IS_I855(dev))
+ dev_priv->display.get_display_clock_speed =
+ i855_get_display_clock_speed;
+ else /* 852, 830 */
+ dev_priv->display.get_display_clock_speed =
+ i830_get_display_clock_speed;
+
+ /* For FIFO watermark updates */
+ if (IS_G4X(dev))
+ dev_priv->display.update_wm = g4x_update_wm;
+ else if (IS_I965G(dev))
+ dev_priv->display.update_wm = i965_update_wm;
+ else if (IS_I9XX(dev) || IS_MOBILE(dev)) {
+ dev_priv->display.update_wm = i9xx_update_wm;
+ dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
+ } else {
+ if (IS_I85X(dev))
+ dev_priv->display.get_fifo_size = i85x_get_fifo_size;
+ else if (IS_845G(dev))
+ dev_priv->display.get_fifo_size = i845_get_fifo_size;
+ else
+ dev_priv->display.get_fifo_size = i830_get_fifo_size;
+ dev_priv->display.update_wm = i830_update_wm;
+ }
+}
+
void intel_modeset_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3839,6 +4236,8 @@ void intel_modeset_init(struct drm_device *dev)
dev->mode_config.funcs = (void *)&intel_mode_funcs;
+ intel_init_display(dev);
+
if (IS_I965G(dev)) {
dev->mode_config.max_width = 8192;
dev->mode_config.max_height = 8192;
@@ -3904,6 +4303,9 @@ void intel_modeset_cleanup(struct drm_device *dev)
mutex_unlock(&dev->struct_mutex);
+ if (dev_priv->display.disable_fbc)
+ dev_priv->display.disable_fbc(dev);
+
drm_mode_config_cleanup(dev);
}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 3ebbbabfe59..8aa4b7f30da 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -28,6 +28,7 @@
#include <linux/i2c.h>
#include <linux/i2c-id.h>
#include <linux/i2c-algo-bit.h>
+#include "i915_drv.h"
#include "drm_crtc.h"
#include "drm_crtc_helper.h"
@@ -111,8 +112,8 @@ struct intel_output {
struct intel_crtc {
struct drm_crtc base;
- int pipe;
- int plane;
+ enum pipe pipe;
+ enum plane plane;
struct drm_gem_object *cursor_bo;
uint32_t cursor_addr;
u8 lut_r[256], lut_g[256], lut_b[256];
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index dafc0da1c25..98ae3d73577 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -27,6 +27,7 @@
* Jesse Barnes <jesse.barnes@intel.com>
*/
+#include <acpi/button.h>
#include <linux/dmi.h>
#include <linux/i2c.h>
#include "drmP.h"
@@ -295,6 +296,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
goto out;
}
+ /* full screen scale for now */
+ if (IS_IGDNG(dev))
+ goto out;
+
/* 965+ wants fuzzy fitting */
if (IS_I965G(dev))
pfit_control |= (intel_crtc->pipe << PFIT_PIPE_SHIFT) |
@@ -322,8 +327,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
* to register description and PRM.
* Change the value here to see the borders for debugging
*/
- I915_WRITE(BCLRPAT_A, 0);
- I915_WRITE(BCLRPAT_B, 0);
+ if (!IS_IGDNG(dev)) {
+ I915_WRITE(BCLRPAT_A, 0);
+ I915_WRITE(BCLRPAT_B, 0);
+ }
switch (lvds_priv->fitting_mode) {
case DRM_MODE_SCALE_CENTER:
@@ -572,7 +579,6 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
* settings.
*/
- /* No panel fitting yet, fixme */
if (IS_IGDNG(dev))
return;
@@ -585,15 +591,33 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
I915_WRITE(PFIT_CONTROL, lvds_priv->pfit_control);
}
+/* Some lid devices report incorrect lid status, assume they're connected */
+static const struct dmi_system_id bad_lid_status[] = {
+ {
+ .ident = "Aspire One",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire one"),
+ },
+ },
+ { }
+};
+
/**
* Detect the LVDS connection.
*
- * This always returns CONNECTOR_STATUS_CONNECTED. This connector should only have
- * been set up if the LVDS was actually connected anyway.
+ * Since LVDS doesn't have hotlug, we use the lid as a proxy. Open means
+ * connected and closed means disconnected. We also send hotplug events as
+ * needed, using lid status notification from the input layer.
*/
static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector)
{
- return connector_status_connected;
+ enum drm_connector_status status = connector_status_connected;
+
+ if (!acpi_lid_open() && !dmi_check_system(bad_lid_status))
+ status = connector_status_disconnected;
+
+ return status;
}
/**
@@ -632,6 +656,24 @@ static int intel_lvds_get_modes(struct drm_connector *connector)
return 0;
}
+static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
+ void *unused)
+{
+ struct drm_i915_private *dev_priv =
+ container_of(nb, struct drm_i915_private, lid_notifier);
+ struct drm_device *dev = dev_priv->dev;
+
+ if (acpi_lid_open() && !dev_priv->suspended) {
+ mutex_lock(&dev->mode_config.mutex);
+ drm_helper_resume_force_mode(dev);
+ mutex_unlock(&dev->mode_config.mutex);
+ }
+
+ drm_sysfs_hotplug_event(dev_priv->dev);
+
+ return NOTIFY_OK;
+}
+
/**
* intel_lvds_destroy - unregister and free LVDS structures
* @connector: connector to free
@@ -641,10 +683,14 @@ static int intel_lvds_get_modes(struct drm_connector *connector)
*/
static void intel_lvds_destroy(struct drm_connector *connector)
{
+ struct drm_device *dev = connector->dev;
struct intel_output *intel_output = to_intel_output(connector);
+ struct drm_i915_private *dev_priv = dev->dev_private;
if (intel_output->ddc_bus)
intel_i2c_destroy(intel_output->ddc_bus);
+ if (dev_priv->lid_notifier.notifier_call)
+ acpi_lid_notifier_unregister(&dev_priv->lid_notifier);
drm_sysfs_connector_remove(connector);
drm_connector_cleanup(connector);
kfree(connector);
@@ -1011,6 +1057,11 @@ out:
pwm |= PWM_PCH_ENABLE;
I915_WRITE(BLC_PWM_PCH_CTL1, pwm);
}
+ dev_priv->lid_notifier.notifier_call = intel_lid_notify;
+ if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) {
+ DRM_DEBUG("lid notifier registration failed\n");
+ dev_priv->lid_notifier.notifier_call = NULL;
+ }
drm_sysfs_connector_add(connector);
return;
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 0bf28efcf2c..083bec2e50f 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -135,6 +135,30 @@ struct intel_sdvo_priv {
struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2;
struct intel_sdvo_dtd save_output_dtd[16];
u32 save_SDVOX;
+ /* add the property for the SDVO-TV */
+ struct drm_property *left_property;
+ struct drm_property *right_property;
+ struct drm_property *top_property;
+ struct drm_property *bottom_property;
+ struct drm_property *hpos_property;
+ struct drm_property *vpos_property;
+
+ /* add the property for the SDVO-TV/LVDS */
+ struct drm_property *brightness_property;
+ struct drm_property *contrast_property;
+ struct drm_property *saturation_property;
+ struct drm_property *hue_property;
+
+ /* Add variable to record current setting for the above property */
+ u32 left_margin, right_margin, top_margin, bottom_margin;
+ /* this is to get the range of margin.*/
+ u32 max_hscan, max_vscan;
+ u32 max_hpos, cur_hpos;
+ u32 max_vpos, cur_vpos;
+ u32 cur_brightness, max_brightness;
+ u32 cur_contrast, max_contrast;
+ u32 cur_saturation, max_saturation;
+ u32 cur_hue, max_hue;
};
static bool
@@ -281,6 +305,31 @@ static const struct _sdvo_cmd_name {
SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT),
SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT),
SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS),
+ /* Add the op code for SDVO enhancements */
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_V),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_V),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_V),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V),
/* HDMI op code */
SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE),
SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE),
@@ -981,7 +1030,7 @@ static void intel_sdvo_set_tv_format(struct intel_output *output)
status = intel_sdvo_read_response(output, NULL, 0);
if (status != SDVO_CMD_STATUS_SUCCESS)
- DRM_DEBUG("%s: Failed to set TV format\n",
+ DRM_DEBUG_KMS("%s: Failed to set TV format\n",
SDVO_NAME(sdvo_priv));
}
@@ -1792,6 +1841,45 @@ static int intel_sdvo_get_modes(struct drm_connector *connector)
return 1;
}
+static
+void intel_sdvo_destroy_enhance_property(struct drm_connector *connector)
+{
+ struct intel_output *intel_output = to_intel_output(connector);
+ struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+ struct drm_device *dev = connector->dev;
+
+ if (sdvo_priv->is_tv) {
+ if (sdvo_priv->left_property)
+ drm_property_destroy(dev, sdvo_priv->left_property);
+ if (sdvo_priv->right_property)
+ drm_property_destroy(dev, sdvo_priv->right_property);
+ if (sdvo_priv->top_property)
+ drm_property_destroy(dev, sdvo_priv->top_property);
+ if (sdvo_priv->bottom_property)
+ drm_property_destroy(dev, sdvo_priv->bottom_property);
+ if (sdvo_priv->hpos_property)
+ drm_property_destroy(dev, sdvo_priv->hpos_property);
+ if (sdvo_priv->vpos_property)
+ drm_property_destroy(dev, sdvo_priv->vpos_property);
+ }
+ if (sdvo_priv->is_tv) {
+ if (sdvo_priv->saturation_property)
+ drm_property_destroy(dev,
+ sdvo_priv->saturation_property);
+ if (sdvo_priv->contrast_property)
+ drm_property_destroy(dev,
+ sdvo_priv->contrast_property);
+ if (sdvo_priv->hue_property)
+ drm_property_destroy(dev, sdvo_priv->hue_property);
+ }
+ if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+ if (sdvo_priv->brightness_property)
+ drm_property_destroy(dev,
+ sdvo_priv->brightness_property);
+ }
+ return;
+}
+
static void intel_sdvo_destroy(struct drm_connector *connector)
{
struct intel_output *intel_output = to_intel_output(connector);
@@ -1812,6 +1900,9 @@ static void intel_sdvo_destroy(struct drm_connector *connector)
drm_property_destroy(connector->dev,
sdvo_priv->tv_format_property);
+ if (sdvo_priv->is_tv || sdvo_priv->is_lvds)
+ intel_sdvo_destroy_enhance_property(connector);
+
drm_sysfs_connector_remove(connector);
drm_connector_cleanup(connector);
@@ -1829,6 +1920,8 @@ intel_sdvo_set_property(struct drm_connector *connector,
struct drm_crtc *crtc = encoder->crtc;
int ret = 0;
bool changed = false;
+ uint8_t cmd, status;
+ uint16_t temp_value;
ret = drm_connector_property_set_value(connector, property, val);
if (ret < 0)
@@ -1845,11 +1938,102 @@ intel_sdvo_set_property(struct drm_connector *connector,
sdvo_priv->tv_format_name = sdvo_priv->tv_format_supported[val];
changed = true;
- } else {
- ret = -EINVAL;
- goto out;
}
+ if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+ cmd = 0;
+ temp_value = val;
+ if (sdvo_priv->left_property == property) {
+ drm_connector_property_set_value(connector,
+ sdvo_priv->right_property, val);
+ if (sdvo_priv->left_margin == temp_value)
+ goto out;
+
+ sdvo_priv->left_margin = temp_value;
+ sdvo_priv->right_margin = temp_value;
+ temp_value = sdvo_priv->max_hscan -
+ sdvo_priv->left_margin;
+ cmd = SDVO_CMD_SET_OVERSCAN_H;
+ } else if (sdvo_priv->right_property == property) {
+ drm_connector_property_set_value(connector,
+ sdvo_priv->left_property, val);
+ if (sdvo_priv->right_margin == temp_value)
+ goto out;
+
+ sdvo_priv->left_margin = temp_value;
+ sdvo_priv->right_margin = temp_value;
+ temp_value = sdvo_priv->max_hscan -
+ sdvo_priv->left_margin;
+ cmd = SDVO_CMD_SET_OVERSCAN_H;
+ } else if (sdvo_priv->top_property == property) {
+ drm_connector_property_set_value(connector,
+ sdvo_priv->bottom_property, val);
+ if (sdvo_priv->top_margin == temp_value)
+ goto out;
+
+ sdvo_priv->top_margin = temp_value;
+ sdvo_priv->bottom_margin = temp_value;
+ temp_value = sdvo_priv->max_vscan -
+ sdvo_priv->top_margin;
+ cmd = SDVO_CMD_SET_OVERSCAN_V;
+ } else if (sdvo_priv->bottom_property == property) {
+ drm_connector_property_set_value(connector,
+ sdvo_priv->top_property, val);
+ if (sdvo_priv->bottom_margin == temp_value)
+ goto out;
+ sdvo_priv->top_margin = temp_value;
+ sdvo_priv->bottom_margin = temp_value;
+ temp_value = sdvo_priv->max_vscan -
+ sdvo_priv->top_margin;
+ cmd = SDVO_CMD_SET_OVERSCAN_V;
+ } else if (sdvo_priv->hpos_property == property) {
+ if (sdvo_priv->cur_hpos == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_POSITION_H;
+ sdvo_priv->cur_hpos = temp_value;
+ } else if (sdvo_priv->vpos_property == property) {
+ if (sdvo_priv->cur_vpos == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_POSITION_V;
+ sdvo_priv->cur_vpos = temp_value;
+ } else if (sdvo_priv->saturation_property == property) {
+ if (sdvo_priv->cur_saturation == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_SATURATION;
+ sdvo_priv->cur_saturation = temp_value;
+ } else if (sdvo_priv->contrast_property == property) {
+ if (sdvo_priv->cur_contrast == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_CONTRAST;
+ sdvo_priv->cur_contrast = temp_value;
+ } else if (sdvo_priv->hue_property == property) {
+ if (sdvo_priv->cur_hue == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_HUE;
+ sdvo_priv->cur_hue = temp_value;
+ } else if (sdvo_priv->brightness_property == property) {
+ if (sdvo_priv->cur_brightness == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_BRIGHTNESS;
+ sdvo_priv->cur_brightness = temp_value;
+ }
+ if (cmd) {
+ intel_sdvo_write_cmd(intel_output, cmd, &temp_value, 2);
+ status = intel_sdvo_read_response(intel_output,
+ NULL, 0);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO command \n");
+ return -EINVAL;
+ }
+ changed = true;
+ }
+ }
if (changed && crtc)
drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x,
crtc->y, crtc->fb);
@@ -2090,6 +2274,8 @@ intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags)
sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1;
encoder->encoder_type = DRM_MODE_ENCODER_DAC;
connector->connector_type = DRM_MODE_CONNECTOR_VGA;
+ intel_output->clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
+ (1 << INTEL_ANALOG_CLONE_BIT);
} else if (flags & SDVO_OUTPUT_LVDS0) {
sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0;
@@ -2176,6 +2362,310 @@ static void intel_sdvo_tv_create_property(struct drm_connector *connector)
}
+static void intel_sdvo_create_enhance_property(struct drm_connector *connector)
+{
+ struct intel_output *intel_output = to_intel_output(connector);
+ struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+ struct intel_sdvo_enhancements_reply sdvo_data;
+ struct drm_device *dev = connector->dev;
+ uint8_t status;
+ uint16_t response, data_value[2];
+
+ intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS,
+ NULL, 0);
+ status = intel_sdvo_read_response(intel_output, &sdvo_data,
+ sizeof(sdvo_data));
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS(" incorrect response is returned\n");
+ return;
+ }
+ response = *((uint16_t *)&sdvo_data);
+ if (!response) {
+ DRM_DEBUG_KMS("No enhancement is supported\n");
+ return;
+ }
+ if (sdvo_priv->is_tv) {
+ /* when horizontal overscan is supported, Add the left/right
+ * property
+ */
+ if (sdvo_data.overscan_h) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_OVERSCAN_H, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO max "
+ "h_overscan\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_OVERSCAN_H, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO h_overscan\n");
+ return;
+ }
+ sdvo_priv->max_hscan = data_value[0];
+ sdvo_priv->left_margin = data_value[0] - response;
+ sdvo_priv->right_margin = sdvo_priv->left_margin;
+ sdvo_priv->left_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "left_margin", 2);
+ sdvo_priv->left_property->values[0] = 0;
+ sdvo_priv->left_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->left_property,
+ sdvo_priv->left_margin);
+ sdvo_priv->right_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "right_margin", 2);
+ sdvo_priv->right_property->values[0] = 0;
+ sdvo_priv->right_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->right_property,
+ sdvo_priv->right_margin);
+ DRM_DEBUG_KMS("h_overscan: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ if (sdvo_data.overscan_v) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_OVERSCAN_V, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO max "
+ "v_overscan\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_OVERSCAN_V, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO v_overscan\n");
+ return;
+ }
+ sdvo_priv->max_vscan = data_value[0];
+ sdvo_priv->top_margin = data_value[0] - response;
+ sdvo_priv->bottom_margin = sdvo_priv->top_margin;
+ sdvo_priv->top_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "top_margin", 2);
+ sdvo_priv->top_property->values[0] = 0;
+ sdvo_priv->top_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->top_property,
+ sdvo_priv->top_margin);
+ sdvo_priv->bottom_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "bottom_margin", 2);
+ sdvo_priv->bottom_property->values[0] = 0;
+ sdvo_priv->bottom_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->bottom_property,
+ sdvo_priv->bottom_margin);
+ DRM_DEBUG_KMS("v_overscan: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ if (sdvo_data.position_h) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_POSITION_H, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max h_pos\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_POSITION_H, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get h_postion\n");
+ return;
+ }
+ sdvo_priv->max_hpos = data_value[0];
+ sdvo_priv->cur_hpos = response;
+ sdvo_priv->hpos_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "hpos", 2);
+ sdvo_priv->hpos_property->values[0] = 0;
+ sdvo_priv->hpos_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->hpos_property,
+ sdvo_priv->cur_hpos);
+ DRM_DEBUG_KMS("h_position: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ if (sdvo_data.position_v) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_POSITION_V, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max v_pos\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_POSITION_V, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get v_postion\n");
+ return;
+ }
+ sdvo_priv->max_vpos = data_value[0];
+ sdvo_priv->cur_vpos = response;
+ sdvo_priv->vpos_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "vpos", 2);
+ sdvo_priv->vpos_property->values[0] = 0;
+ sdvo_priv->vpos_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->vpos_property,
+ sdvo_priv->cur_vpos);
+ DRM_DEBUG_KMS("v_position: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ }
+ if (sdvo_priv->is_tv) {
+ if (sdvo_data.saturation) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_SATURATION, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max sat\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_SATURATION, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get sat\n");
+ return;
+ }
+ sdvo_priv->max_saturation = data_value[0];
+ sdvo_priv->cur_saturation = response;
+ sdvo_priv->saturation_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "saturation", 2);
+ sdvo_priv->saturation_property->values[0] = 0;
+ sdvo_priv->saturation_property->values[1] =
+ data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->saturation_property,
+ sdvo_priv->cur_saturation);
+ DRM_DEBUG_KMS("saturation: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ if (sdvo_data.contrast) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_CONTRAST, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max contrast\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_CONTRAST, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get contrast\n");
+ return;
+ }
+ sdvo_priv->max_contrast = data_value[0];
+ sdvo_priv->cur_contrast = response;
+ sdvo_priv->contrast_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "contrast", 2);
+ sdvo_priv->contrast_property->values[0] = 0;
+ sdvo_priv->contrast_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->contrast_property,
+ sdvo_priv->cur_contrast);
+ DRM_DEBUG_KMS("contrast: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ if (sdvo_data.hue) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_HUE, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max hue\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_HUE, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get hue\n");
+ return;
+ }
+ sdvo_priv->max_hue = data_value[0];
+ sdvo_priv->cur_hue = response;
+ sdvo_priv->hue_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "hue", 2);
+ sdvo_priv->hue_property->values[0] = 0;
+ sdvo_priv->hue_property->values[1] =
+ data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->hue_property,
+ sdvo_priv->cur_hue);
+ DRM_DEBUG_KMS("hue: max %d, default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ }
+ if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+ if (sdvo_data.brightness) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_BRIGHTNESS, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max bright\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_BRIGHTNESS, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get brigh\n");
+ return;
+ }
+ sdvo_priv->max_brightness = data_value[0];
+ sdvo_priv->cur_brightness = response;
+ sdvo_priv->brightness_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "brightness", 2);
+ sdvo_priv->brightness_property->values[0] = 0;
+ sdvo_priv->brightness_property->values[1] =
+ data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->brightness_property,
+ sdvo_priv->cur_brightness);
+ DRM_DEBUG_KMS("brightness: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ }
+ return;
+}
+
bool intel_sdvo_init(struct drm_device *dev, int output_device)
{
struct drm_connector *connector;
@@ -2264,6 +2754,10 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc);
if (sdvo_priv->is_tv)
intel_sdvo_tv_create_property(connector);
+
+ if (sdvo_priv->is_tv || sdvo_priv->is_lvds)
+ intel_sdvo_create_enhance_property(connector);
+
drm_sysfs_connector_add(connector);
intel_sdvo_select_ddc_bus(sdvo_priv);
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index ed7711d11ae..6857560144b 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -325,34 +325,6 @@ config SENSORS_F75375S
This driver can also be built as a module. If so, the module
will be called f75375s.
-config SENSORS_FSCHER
- tristate "FSC Hermes (DEPRECATED)"
- depends on X86 && I2C
- help
- This driver is DEPRECATED please use the new merged fschmd
- ("FSC Poseidon, Scylla, Hermes, Heimdall and Heracles") driver
- instead.
-
- If you say yes here you get support for Fujitsu Siemens
- Computers Hermes sensor chips.
-
- This driver can also be built as a module. If so, the module
- will be called fscher.
-
-config SENSORS_FSCPOS
- tristate "FSC Poseidon (DEPRECATED)"
- depends on X86 && I2C
- help
- This driver is DEPRECATED please use the new merged fschmd
- ("FSC Poseidon, Scylla, Hermes, Heimdall and Heracles") driver
- instead.
-
- If you say yes here you get support for Fujitsu Siemens
- Computers Poseidon sensor chips.
-
- This driver can also be built as a module. If so, the module
- will be called fscpos.
-
config SENSORS_FSCHMD
tristate "Fujitsu Siemens Computers sensor chips"
depends on X86 && I2C
@@ -401,12 +373,12 @@ config SENSORS_GL520SM
will be called gl520sm.
config SENSORS_CORETEMP
- tristate "Intel Core (2) Duo/Solo temperature sensor"
+ tristate "Intel Core/Core2/Atom temperature sensor"
depends on X86 && EXPERIMENTAL
help
If you say yes here you get support for the temperature
- sensor inside your CPU. Supported all are all known variants
- of Intel Core family.
+ sensor inside your CPU. Most of the family 6 CPUs
+ are supported. Check documentation/driver for details.
config SENSORS_IBMAEM
tristate "IBM Active Energy Manager temperature/power sensors and control"
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index bcf73a9bb61..9f46cb019cc 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -42,9 +42,7 @@ obj-$(CONFIG_SENSORS_DS1621) += ds1621.o
obj-$(CONFIG_SENSORS_F71805F) += f71805f.o
obj-$(CONFIG_SENSORS_F71882FG) += f71882fg.o
obj-$(CONFIG_SENSORS_F75375S) += f75375s.o
-obj-$(CONFIG_SENSORS_FSCHER) += fscher.o
obj-$(CONFIG_SENSORS_FSCHMD) += fschmd.o
-obj-$(CONFIG_SENSORS_FSCPOS) += fscpos.o
obj-$(CONFIG_SENSORS_G760A) += g760a.o
obj-$(CONFIG_SENSORS_GL518SM) += gl518sm.o
obj-$(CONFIG_SENSORS_GL520SM) += gl520sm.o
diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c
index 789441830cd..56905955352 100644
--- a/drivers/hwmon/adm1031.c
+++ b/drivers/hwmon/adm1031.c
@@ -37,6 +37,7 @@
#define ADM1031_REG_PWM (0x22)
#define ADM1031_REG_FAN_MIN(nr) (0x10 + (nr))
+#define ADM1031_REG_TEMP_OFFSET(nr) (0x0d + (nr))
#define ADM1031_REG_TEMP_MAX(nr) (0x14 + 4 * (nr))
#define ADM1031_REG_TEMP_MIN(nr) (0x15 + 4 * (nr))
#define ADM1031_REG_TEMP_CRIT(nr) (0x16 + 4 * (nr))
@@ -93,6 +94,7 @@ struct adm1031_data {
u8 auto_temp_min[3];
u8 auto_temp_off[3];
u8 auto_temp_max[3];
+ s8 temp_offset[3];
s8 temp_min[3];
s8 temp_max[3];
s8 temp_crit[3];
@@ -145,6 +147,10 @@ adm1031_write_value(struct i2c_client *client, u8 reg, unsigned int value)
#define TEMP_FROM_REG_EXT(val, ext) (TEMP_FROM_REG(val) + (ext) * 125)
+#define TEMP_OFFSET_TO_REG(val) (TEMP_TO_REG(val) & 0x8f)
+#define TEMP_OFFSET_FROM_REG(val) TEMP_FROM_REG((val) < 0 ? \
+ (val) | 0x70 : (val))
+
#define FAN_FROM_REG(reg, div) ((reg) ? (11250 * 60) / ((reg) * (div)) : 0)
static int FAN_TO_REG(int reg, int div)
@@ -585,6 +591,14 @@ static ssize_t show_temp(struct device *dev,
(((data->ext_temp[nr] >> ((nr - 1) * 3)) & 7));
return sprintf(buf, "%d\n", TEMP_FROM_REG_EXT(data->temp[nr], ext));
}
+static ssize_t show_temp_offset(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int nr = to_sensor_dev_attr(attr)->index;
+ struct adm1031_data *data = adm1031_update_device(dev);
+ return sprintf(buf, "%d\n",
+ TEMP_OFFSET_FROM_REG(data->temp_offset[nr]));
+}
static ssize_t show_temp_min(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -606,6 +620,24 @@ static ssize_t show_temp_crit(struct device *dev,
struct adm1031_data *data = adm1031_update_device(dev);
return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_crit[nr]));
}
+static ssize_t set_temp_offset(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct adm1031_data *data = i2c_get_clientdata(client);
+ int nr = to_sensor_dev_attr(attr)->index;
+ int val;
+
+ val = simple_strtol(buf, NULL, 10);
+ val = SENSORS_LIMIT(val, -15000, 15000);
+ mutex_lock(&data->update_lock);
+ data->temp_offset[nr] = TEMP_OFFSET_TO_REG(val);
+ adm1031_write_value(client, ADM1031_REG_TEMP_OFFSET(nr),
+ data->temp_offset[nr]);
+ mutex_unlock(&data->update_lock);
+ return count;
+}
static ssize_t set_temp_min(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
@@ -661,6 +693,8 @@ static ssize_t set_temp_crit(struct device *dev, struct device_attribute *attr,
#define temp_reg(offset) \
static SENSOR_DEVICE_ATTR(temp##offset##_input, S_IRUGO, \
show_temp, NULL, offset - 1); \
+static SENSOR_DEVICE_ATTR(temp##offset##_offset, S_IRUGO | S_IWUSR, \
+ show_temp_offset, set_temp_offset, offset - 1); \
static SENSOR_DEVICE_ATTR(temp##offset##_min, S_IRUGO | S_IWUSR, \
show_temp_min, set_temp_min, offset - 1); \
static SENSOR_DEVICE_ATTR(temp##offset##_max, S_IRUGO | S_IWUSR, \
@@ -714,6 +748,7 @@ static struct attribute *adm1031_attributes[] = {
&sensor_dev_attr_pwm1.dev_attr.attr,
&sensor_dev_attr_auto_fan1_channel.dev_attr.attr,
&sensor_dev_attr_temp1_input.dev_attr.attr,
+ &sensor_dev_attr_temp1_offset.dev_attr.attr,
&sensor_dev_attr_temp1_min.dev_attr.attr,
&sensor_dev_attr_temp1_min_alarm.dev_attr.attr,
&sensor_dev_attr_temp1_max.dev_attr.attr,
@@ -721,6 +756,7 @@ static struct attribute *adm1031_attributes[] = {
&sensor_dev_attr_temp1_crit.dev_attr.attr,
&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
&sensor_dev_attr_temp2_input.dev_attr.attr,
+ &sensor_dev_attr_temp2_offset.dev_attr.attr,
&sensor_dev_attr_temp2_min.dev_attr.attr,
&sensor_dev_attr_temp2_min_alarm.dev_attr.attr,
&sensor_dev_attr_temp2_max.dev_attr.attr,
@@ -757,6 +793,7 @@ static struct attribute *adm1031_attributes_opt[] = {
&sensor_dev_attr_pwm2.dev_attr.attr,
&sensor_dev_attr_auto_fan2_channel.dev_attr.attr,
&sensor_dev_attr_temp3_input.dev_attr.attr,
+ &sensor_dev_attr_temp3_offset.dev_attr.attr,
&sensor_dev_attr_temp3_min.dev_attr.attr,
&sensor_dev_attr_temp3_min_alarm.dev_attr.attr,
&sensor_dev_attr_temp3_max.dev_attr.attr,
@@ -937,6 +974,9 @@ static struct adm1031_data *adm1031_update_device(struct device *dev)
}
data->temp[chan] = newh;
+ data->temp_offset[chan] =
+ adm1031_read_value(client,
+ ADM1031_REG_TEMP_OFFSET(chan));
data->temp_min[chan] =
adm1031_read_value(client,
ADM1031_REG_TEMP_MIN(chan));
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 972cf4ba963..caef39cda8c 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -157,17 +157,26 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *
/* The 100C is default for both mobile and non mobile CPUs */
int tjmax = 100000;
- int ismobile = 1;
+ int tjmax_ee = 85000;
+ int usemsr_ee = 1;
int err;
u32 eax, edx;
/* Early chips have no MSR for TjMax */
if ((c->x86_model == 0xf) && (c->x86_mask < 4)) {
- ismobile = 0;
+ usemsr_ee = 0;
}
- if ((c->x86_model > 0xe) && (ismobile)) {
+ /* Atoms seems to have TjMax at 90C */
+
+ if (c->x86_model == 0x1c) {
+ usemsr_ee = 0;
+ tjmax = 90000;
+ }
+
+ if ((c->x86_model > 0xe) && (usemsr_ee)) {
+ u8 platform_id;
/* Now we can detect the mobile CPU using Intel provided table
http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
@@ -179,13 +188,29 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *
dev_warn(dev,
"Unable to access MSR 0x17, assuming desktop"
" CPU\n");
- ismobile = 0;
- } else if (!(eax & 0x10000000)) {
- ismobile = 0;
+ usemsr_ee = 0;
+ } else if (c->x86_model < 0x17 && !(eax & 0x10000000)) {
+ /* Trust bit 28 up to Penryn, I could not find any
+ documentation on that; if you happen to know
+ someone at Intel please ask */
+ usemsr_ee = 0;
+ } else {
+ /* Platform ID bits 52:50 (EDX starts at bit 32) */
+ platform_id = (edx >> 18) & 0x7;
+
+ /* Mobile Penryn CPU seems to be platform ID 7 or 5
+ (guesswork) */
+ if ((c->x86_model == 0x17) &&
+ ((platform_id == 5) || (platform_id == 7))) {
+ /* If MSR EE bit is set, set it to 90 degrees C,
+ otherwise 105 degrees C */
+ tjmax_ee = 90000;
+ tjmax = 105000;
+ }
}
}
- if (ismobile || c->x86_model == 0x1c) {
+ if (usemsr_ee) {
err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx);
if (err) {
@@ -193,9 +218,11 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *
"Unable to access MSR 0xEE, for Tjmax, left"
" at default");
} else if (eax & 0x40000000) {
- tjmax = 85000;
+ tjmax = tjmax_ee;
}
- } else {
+ /* if we dont use msr EE it means we are desktop CPU (with exeception
+ of Atom) */
+ } else if (tjmax == 100000) {
dev_warn(dev, "Using relative temperature scale!\n");
}
@@ -248,9 +275,9 @@ static int __devinit coretemp_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, data);
/* read the still undocumented IA32_TEMPERATURE_TARGET it exists
- on older CPUs but not in this register */
+ on older CPUs but not in this register, Atoms don't have it either */
- if (c->x86_model > 0xe) {
+ if ((c->x86_model > 0xe) && (c->x86_model != 0x1c)) {
err = rdmsr_safe_on_cpu(data->id, 0x1a2, &eax, &edx);
if (err) {
dev_warn(&pdev->dev, "Unable to read"
@@ -413,11 +440,15 @@ static int __init coretemp_init(void)
for_each_online_cpu(i) {
struct cpuinfo_x86 *c = &cpu_data(i);
- /* check if family 6, models 0xe, 0xf, 0x16, 0x17, 0x1A */
+ /* check if family 6, models 0xe (Pentium M DC),
+ 0xf (Core 2 DC 65nm), 0x16 (Core 2 SC 65nm),
+ 0x17 (Penryn 45nm), 0x1a (Nehalem), 0x1c (Atom),
+ 0x1e (Lynnfield) */
if ((c->cpuid_level < 0) || (c->x86 != 0x6) ||
!((c->x86_model == 0xe) || (c->x86_model == 0xf) ||
(c->x86_model == 0x16) || (c->x86_model == 0x17) ||
- (c->x86_model == 0x1A) || (c->x86_model == 0x1c))) {
+ (c->x86_model == 0x1a) || (c->x86_model == 0x1c) ||
+ (c->x86_model == 0x1e))) {
/* supported CPU not found, but report the unknown
family 6 CPU */
diff --git a/drivers/hwmon/fscher.c b/drivers/hwmon/fscher.c
deleted file mode 100644
index 12c70e402cb..00000000000
--- a/drivers/hwmon/fscher.c
+++ /dev/null
@@ -1,680 +0,0 @@
-/*
- * fscher.c - Part of lm_sensors, Linux kernel modules for hardware
- * monitoring
- * Copyright (C) 2003, 2004 Reinhard Nissl <rnissl@gmx.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * fujitsu siemens hermes chip,
- * module based on fscpos.c
- * Copyright (C) 2000 Hermann Jung <hej@odn.de>
- * Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl>
- * and Philip Edelbrock <phil@netroedge.com>
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/i2c.h>
-#include <linux/hwmon.h>
-#include <linux/err.h>
-#include <linux/mutex.h>
-#include <linux/sysfs.h>
-
-/*
- * Addresses to scan
- */
-
-static const unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END };
-
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_1(fscher);
-
-/*
- * The FSCHER registers
- */
-
-/* chip identification */
-#define FSCHER_REG_IDENT_0 0x00
-#define FSCHER_REG_IDENT_1 0x01
-#define FSCHER_REG_IDENT_2 0x02
-#define FSCHER_REG_REVISION 0x03
-
-/* global control and status */
-#define FSCHER_REG_EVENT_STATE 0x04
-#define FSCHER_REG_CONTROL 0x05
-
-/* watchdog */
-#define FSCHER_REG_WDOG_PRESET 0x28
-#define FSCHER_REG_WDOG_STATE 0x23
-#define FSCHER_REG_WDOG_CONTROL 0x21
-
-/* fan 0 */
-#define FSCHER_REG_FAN0_MIN 0x55
-#define FSCHER_REG_FAN0_ACT 0x0e
-#define FSCHER_REG_FAN0_STATE 0x0d
-#define FSCHER_REG_FAN0_RIPPLE 0x0f
-
-/* fan 1 */
-#define FSCHER_REG_FAN1_MIN 0x65
-#define FSCHER_REG_FAN1_ACT 0x6b
-#define FSCHER_REG_FAN1_STATE 0x62
-#define FSCHER_REG_FAN1_RIPPLE 0x6f
-
-/* fan 2 */
-#define FSCHER_REG_FAN2_MIN 0xb5
-#define FSCHER_REG_FAN2_ACT 0xbb
-#define FSCHER_REG_FAN2_STATE 0xb2
-#define FSCHER_REG_FAN2_RIPPLE 0xbf
-
-/* voltage supervision */
-#define FSCHER_REG_VOLT_12 0x45
-#define FSCHER_REG_VOLT_5 0x42
-#define FSCHER_REG_VOLT_BATT 0x48
-
-/* temperature 0 */
-#define FSCHER_REG_TEMP0_ACT 0x64
-#define FSCHER_REG_TEMP0_STATE 0x71
-
-/* temperature 1 */
-#define FSCHER_REG_TEMP1_ACT 0x32
-#define FSCHER_REG_TEMP1_STATE 0x81
-
-/* temperature 2 */
-#define FSCHER_REG_TEMP2_ACT 0x35
-#define FSCHER_REG_TEMP2_STATE 0x91
-
-/*
- * Functions declaration
- */
-
-static int fscher_probe(struct i2c_client *client,
- const struct i2c_device_id *id);
-static int fscher_detect(struct i2c_client *client, int kind,
- struct i2c_board_info *info);
-static int fscher_remove(struct i2c_client *client);
-static struct fscher_data *fscher_update_device(struct device *dev);
-static void fscher_init_client(struct i2c_client *client);
-
-static int fscher_read_value(struct i2c_client *client, u8 reg);
-static int fscher_write_value(struct i2c_client *client, u8 reg, u8 value);
-
-/*
- * Driver data (common to all clients)
- */
-
-static const struct i2c_device_id fscher_id[] = {
- { "fscher", fscher },
- { }
-};
-
-static struct i2c_driver fscher_driver = {
- .class = I2C_CLASS_HWMON,
- .driver = {
- .name = "fscher",
- },
- .probe = fscher_probe,
- .remove = fscher_remove,
- .id_table = fscher_id,
- .detect = fscher_detect,
- .address_data = &addr_data,
-};
-
-/*
- * Client data (each client gets its own)
- */
-
-struct fscher_data {
- struct device *hwmon_dev;
- struct mutex update_lock;
- char valid; /* zero until following fields are valid */
- unsigned long last_updated; /* in jiffies */
-
- /* register values */
- u8 revision; /* revision of chip */
- u8 global_event; /* global event status */
- u8 global_control; /* global control register */
- u8 watchdog[3]; /* watchdog */
- u8 volt[3]; /* 12, 5, battery voltage */
- u8 temp_act[3]; /* temperature */
- u8 temp_status[3]; /* status of sensor */
- u8 fan_act[3]; /* fans revolutions per second */
- u8 fan_status[3]; /* fan status */
- u8 fan_min[3]; /* fan min value for rps */
- u8 fan_ripple[3]; /* divider for rps */
-};
-
-/*
- * Sysfs stuff
- */
-
-#define sysfs_r(kind, sub, offset, reg) \
-static ssize_t show_##kind##sub (struct fscher_data *, char *, int); \
-static ssize_t show_##kind##offset##sub (struct device *, struct device_attribute *attr, char *); \
-static ssize_t show_##kind##offset##sub (struct device *dev, struct device_attribute *attr, char *buf) \
-{ \
- struct fscher_data *data = fscher_update_device(dev); \
- return show_##kind##sub(data, buf, (offset)); \
-}
-
-#define sysfs_w(kind, sub, offset, reg) \
-static ssize_t set_##kind##sub (struct i2c_client *, struct fscher_data *, const char *, size_t, int, int); \
-static ssize_t set_##kind##offset##sub (struct device *, struct device_attribute *attr, const char *, size_t); \
-static ssize_t set_##kind##offset##sub (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) \
-{ \
- struct i2c_client *client = to_i2c_client(dev); \
- struct fscher_data *data = i2c_get_clientdata(client); \
- return set_##kind##sub(client, data, buf, count, (offset), reg); \
-}
-
-#define sysfs_rw_n(kind, sub, offset, reg) \
-sysfs_r(kind, sub, offset, reg) \
-sysfs_w(kind, sub, offset, reg) \
-static DEVICE_ATTR(kind##offset##sub, S_IRUGO | S_IWUSR, show_##kind##offset##sub, set_##kind##offset##sub);
-
-#define sysfs_rw(kind, sub, reg) \
-sysfs_r(kind, sub, 0, reg) \
-sysfs_w(kind, sub, 0, reg) \
-static DEVICE_ATTR(kind##sub, S_IRUGO | S_IWUSR, show_##kind##0##sub, set_##kind##0##sub);
-
-#define sysfs_ro_n(kind, sub, offset, reg) \
-sysfs_r(kind, sub, offset, reg) \
-static DEVICE_ATTR(kind##offset##sub, S_IRUGO, show_##kind##offset##sub, NULL);
-
-#define sysfs_ro(kind, sub, reg) \
-sysfs_r(kind, sub, 0, reg) \
-static DEVICE_ATTR(kind, S_IRUGO, show_##kind##0##sub, NULL);
-
-#define sysfs_fan(offset, reg_status, reg_min, reg_ripple, reg_act) \
-sysfs_rw_n(pwm, , offset, reg_min) \
-sysfs_rw_n(fan, _status, offset, reg_status) \
-sysfs_rw_n(fan, _div , offset, reg_ripple) \
-sysfs_ro_n(fan, _input , offset, reg_act)
-
-#define sysfs_temp(offset, reg_status, reg_act) \
-sysfs_rw_n(temp, _status, offset, reg_status) \
-sysfs_ro_n(temp, _input , offset, reg_act)
-
-#define sysfs_in(offset, reg_act) \
-sysfs_ro_n(in, _input, offset, reg_act)
-
-#define sysfs_revision(reg_revision) \
-sysfs_ro(revision, , reg_revision)
-
-#define sysfs_alarms(reg_events) \
-sysfs_ro(alarms, , reg_events)
-
-#define sysfs_control(reg_control) \
-sysfs_rw(control, , reg_control)
-
-#define sysfs_watchdog(reg_control, reg_status, reg_preset) \
-sysfs_rw(watchdog, _control, reg_control) \
-sysfs_rw(watchdog, _status , reg_status) \
-sysfs_rw(watchdog, _preset , reg_preset)
-
-sysfs_fan(1, FSCHER_REG_FAN0_STATE, FSCHER_REG_FAN0_MIN,
- FSCHER_REG_FAN0_RIPPLE, FSCHER_REG_FAN0_ACT)
-sysfs_fan(2, FSCHER_REG_FAN1_STATE, FSCHER_REG_FAN1_MIN,
- FSCHER_REG_FAN1_RIPPLE, FSCHER_REG_FAN1_ACT)
-sysfs_fan(3, FSCHER_REG_FAN2_STATE, FSCHER_REG_FAN2_MIN,
- FSCHER_REG_FAN2_RIPPLE, FSCHER_REG_FAN2_ACT)
-
-sysfs_temp(1, FSCHER_REG_TEMP0_STATE, FSCHER_REG_TEMP0_ACT)
-sysfs_temp(2, FSCHER_REG_TEMP1_STATE, FSCHER_REG_TEMP1_ACT)
-sysfs_temp(3, FSCHER_REG_TEMP2_STATE, FSCHER_REG_TEMP2_ACT)
-
-sysfs_in(0, FSCHER_REG_VOLT_12)
-sysfs_in(1, FSCHER_REG_VOLT_5)
-sysfs_in(2, FSCHER_REG_VOLT_BATT)
-
-sysfs_revision(FSCHER_REG_REVISION)
-sysfs_alarms(FSCHER_REG_EVENTS)
-sysfs_control(FSCHER_REG_CONTROL)
-sysfs_watchdog(FSCHER_REG_WDOG_CONTROL, FSCHER_REG_WDOG_STATE, FSCHER_REG_WDOG_PRESET)
-
-static struct attribute *fscher_attributes[] = {
- &dev_attr_revision.attr,
- &dev_attr_alarms.attr,
- &dev_attr_control.attr,
-
- &dev_attr_watchdog_status.attr,
- &dev_attr_watchdog_control.attr,
- &dev_attr_watchdog_preset.attr,
-
- &dev_attr_in0_input.attr,
- &dev_attr_in1_input.attr,
- &dev_attr_in2_input.attr,
-
- &dev_attr_fan1_status.attr,
- &dev_attr_fan1_div.attr,
- &dev_attr_fan1_input.attr,
- &dev_attr_pwm1.attr,
- &dev_attr_fan2_status.attr,
- &dev_attr_fan2_div.attr,
- &dev_attr_fan2_input.attr,
- &dev_attr_pwm2.attr,
- &dev_attr_fan3_status.attr,
- &dev_attr_fan3_div.attr,
- &dev_attr_fan3_input.attr,
- &dev_attr_pwm3.attr,
-
- &dev_attr_temp1_status.attr,
- &dev_attr_temp1_input.attr,
- &dev_attr_temp2_status.attr,
- &dev_attr_temp2_input.attr,
- &dev_attr_temp3_status.attr,
- &dev_attr_temp3_input.attr,
- NULL
-};
-
-static const struct attribute_group fscher_group = {
- .attrs = fscher_attributes,
-};
-
-/*
- * Real code
- */
-
-/* Return 0 if detection is successful, -ENODEV otherwise */
-static int fscher_detect(struct i2c_client *new_client, int kind,
- struct i2c_board_info *info)
-{
- struct i2c_adapter *adapter = new_client->adapter;
-
- if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
- return -ENODEV;
-
- /* Do the remaining detection unless force or force_fscher parameter */
- if (kind < 0) {
- if ((i2c_smbus_read_byte_data(new_client,
- FSCHER_REG_IDENT_0) != 0x48) /* 'H' */
- || (i2c_smbus_read_byte_data(new_client,
- FSCHER_REG_IDENT_1) != 0x45) /* 'E' */
- || (i2c_smbus_read_byte_data(new_client,
- FSCHER_REG_IDENT_2) != 0x52)) /* 'R' */
- return -ENODEV;
- }
-
- strlcpy(info->type, "fscher", I2C_NAME_SIZE);
-
- return 0;
-}
-
-static int fscher_probe(struct i2c_client *new_client,
- const struct i2c_device_id *id)
-{
- struct fscher_data *data;
- int err;
-
- data = kzalloc(sizeof(struct fscher_data), GFP_KERNEL);
- if (!data) {
- err = -ENOMEM;
- goto exit;
- }
-
- i2c_set_clientdata(new_client, data);
- data->valid = 0;
- mutex_init(&data->update_lock);
-
- fscher_init_client(new_client);
-
- /* Register sysfs hooks */
- if ((err = sysfs_create_group(&new_client->dev.kobj, &fscher_group)))
- goto exit_free;
-
- data->hwmon_dev = hwmon_device_register(&new_client->dev);
- if (IS_ERR(data->hwmon_dev)) {
- err = PTR_ERR(data->hwmon_dev);
- goto exit_remove_files;
- }
-
- return 0;
-
-exit_remove_files:
- sysfs_remove_group(&new_client->dev.kobj, &fscher_group);
-exit_free:
- kfree(data);
-exit:
- return err;
-}
-
-static int fscher_remove(struct i2c_client *client)
-{
- struct fscher_data *data = i2c_get_clientdata(client);
-
- hwmon_device_unregister(data->hwmon_dev);
- sysfs_remove_group(&client->dev.kobj, &fscher_group);
-
- kfree(data);
- return 0;
-}
-
-static int fscher_read_value(struct i2c_client *client, u8 reg)
-{
- dev_dbg(&client->dev, "read reg 0x%02x\n", reg);
-
- return i2c_smbus_read_byte_data(client, reg);
-}
-
-static int fscher_write_value(struct i2c_client *client, u8 reg, u8 value)
-{
- dev_dbg(&client->dev, "write reg 0x%02x, val 0x%02x\n",
- reg, value);
-
- return i2c_smbus_write_byte_data(client, reg, value);
-}
-
-/* Called when we have found a new FSC Hermes. */
-static void fscher_init_client(struct i2c_client *client)
-{
- struct fscher_data *data = i2c_get_clientdata(client);
-
- /* Read revision from chip */
- data->revision = fscher_read_value(client, FSCHER_REG_REVISION);
-}
-
-static struct fscher_data *fscher_update_device(struct device *dev)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct fscher_data *data = i2c_get_clientdata(client);
-
- mutex_lock(&data->update_lock);
-
- if (time_after(jiffies, data->last_updated + 2 * HZ) || !data->valid) {
-
- dev_dbg(&client->dev, "Starting fscher update\n");
-
- data->temp_act[0] = fscher_read_value(client, FSCHER_REG_TEMP0_ACT);
- data->temp_act[1] = fscher_read_value(client, FSCHER_REG_TEMP1_ACT);
- data->temp_act[2] = fscher_read_value(client, FSCHER_REG_TEMP2_ACT);
- data->temp_status[0] = fscher_read_value(client, FSCHER_REG_TEMP0_STATE);
- data->temp_status[1] = fscher_read_value(client, FSCHER_REG_TEMP1_STATE);
- data->temp_status[2] = fscher_read_value(client, FSCHER_REG_TEMP2_STATE);
-
- data->volt[0] = fscher_read_value(client, FSCHER_REG_VOLT_12);
- data->volt[1] = fscher_read_value(client, FSCHER_REG_VOLT_5);
- data->volt[2] = fscher_read_value(client, FSCHER_REG_VOLT_BATT);
-
- data->fan_act[0] = fscher_read_value(client, FSCHER_REG_FAN0_ACT);
- data->fan_act[1] = fscher_read_value(client, FSCHER_REG_FAN1_ACT);
- data->fan_act[2] = fscher_read_value(client, FSCHER_REG_FAN2_ACT);
- data->fan_status[0] = fscher_read_value(client, FSCHER_REG_FAN0_STATE);
- data->fan_status[1] = fscher_read_value(client, FSCHER_REG_FAN1_STATE);
- data->fan_status[2] = fscher_read_value(client, FSCHER_REG_FAN2_STATE);
- data->fan_min[0] = fscher_read_value(client, FSCHER_REG_FAN0_MIN);
- data->fan_min[1] = fscher_read_value(client, FSCHER_REG_FAN1_MIN);
- data->fan_min[2] = fscher_read_value(client, FSCHER_REG_FAN2_MIN);
- data->fan_ripple[0] = fscher_read_value(client, FSCHER_REG_FAN0_RIPPLE);
- data->fan_ripple[1] = fscher_read_value(client, FSCHER_REG_FAN1_RIPPLE);
- data->fan_ripple[2] = fscher_read_value(client, FSCHER_REG_FAN2_RIPPLE);
-
- data->watchdog[0] = fscher_read_value(client, FSCHER_REG_WDOG_PRESET);
- data->watchdog[1] = fscher_read_value(client, FSCHER_REG_WDOG_STATE);
- data->watchdog[2] = fscher_read_value(client, FSCHER_REG_WDOG_CONTROL);
-
- data->global_event = fscher_read_value(client, FSCHER_REG_EVENT_STATE);
- data->global_control = fscher_read_value(client,
- FSCHER_REG_CONTROL);
-
- data->last_updated = jiffies;
- data->valid = 1;
- }
-
- mutex_unlock(&data->update_lock);
-
- return data;
-}
-
-
-
-#define FAN_INDEX_FROM_NUM(nr) ((nr) - 1)
-
-static ssize_t set_fan_status(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- /* bits 0..1, 3..7 reserved => mask with 0x04 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0x04;
-
- mutex_lock(&data->update_lock);
- data->fan_status[FAN_INDEX_FROM_NUM(nr)] &= ~v;
- fscher_write_value(client, reg, v);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_fan_status(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 0..1, 3..7 reserved => mask with 0x04 */
- return sprintf(buf, "%u\n", data->fan_status[FAN_INDEX_FROM_NUM(nr)] & 0x04);
-}
-
-static ssize_t set_pwm(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10);
-
- mutex_lock(&data->update_lock);
- data->fan_min[FAN_INDEX_FROM_NUM(nr)] = v > 0xff ? 0xff : v;
- fscher_write_value(client, reg, data->fan_min[FAN_INDEX_FROM_NUM(nr)]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_pwm(struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", data->fan_min[FAN_INDEX_FROM_NUM(nr)]);
-}
-
-static ssize_t set_fan_div(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- /* supported values: 2, 4, 8 */
- unsigned long v = simple_strtoul(buf, NULL, 10);
-
- switch (v) {
- case 2: v = 1; break;
- case 4: v = 2; break;
- case 8: v = 3; break;
- default:
- dev_err(&client->dev, "fan_div value %ld not "
- "supported. Choose one of 2, 4 or 8!\n", v);
- return -EINVAL;
- }
-
- mutex_lock(&data->update_lock);
-
- /* bits 2..7 reserved => mask with 0x03 */
- data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] &= ~0x03;
- data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] |= v;
-
- fscher_write_value(client, reg, data->fan_ripple[FAN_INDEX_FROM_NUM(nr)]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_fan_div(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 2..7 reserved => mask with 0x03 */
- return sprintf(buf, "%u\n", 1 << (data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] & 0x03));
-}
-
-#define RPM_FROM_REG(val) (val*60)
-
-static ssize_t show_fan_input (struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", RPM_FROM_REG(data->fan_act[FAN_INDEX_FROM_NUM(nr)]));
-}
-
-
-
-#define TEMP_INDEX_FROM_NUM(nr) ((nr) - 1)
-
-static ssize_t set_temp_status(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- /* bits 2..7 reserved, 0 read only => mask with 0x02 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02;
-
- mutex_lock(&data->update_lock);
- data->temp_status[TEMP_INDEX_FROM_NUM(nr)] &= ~v;
- fscher_write_value(client, reg, v);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_temp_status(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 2..7 reserved => mask with 0x03 */
- return sprintf(buf, "%u\n", data->temp_status[TEMP_INDEX_FROM_NUM(nr)] & 0x03);
-}
-
-#define TEMP_FROM_REG(val) (((val) - 128) * 1000)
-
-static ssize_t show_temp_input(struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_act[TEMP_INDEX_FROM_NUM(nr)]));
-}
-
-/*
- * The final conversion is specified in sensors.conf, as it depends on
- * mainboard specific values. We export the registers contents as
- * pseudo-hundredths-of-Volts (range 0V - 2.55V). Not that it makes much
- * sense per se, but it minimizes the conversions count and keeps the
- * values within a usual range.
- */
-#define VOLT_FROM_REG(val) ((val) * 10)
-
-static ssize_t show_in_input(struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[nr]));
-}
-
-
-
-static ssize_t show_revision(struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", data->revision);
-}
-
-
-
-static ssize_t show_alarms(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 2, 5..6 reserved => mask with 0x9b */
- return sprintf(buf, "%u\n", data->global_event & 0x9b);
-}
-
-
-
-static ssize_t set_control(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- /* bits 1..7 reserved => mask with 0x01 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0x01;
-
- mutex_lock(&data->update_lock);
- data->global_control = v;
- fscher_write_value(client, reg, v);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_control(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 1..7 reserved => mask with 0x01 */
- return sprintf(buf, "%u\n", data->global_control & 0x01);
-}
-
-
-
-static ssize_t set_watchdog_control(struct i2c_client *client, struct
- fscher_data *data, const char *buf, size_t count,
- int nr, int reg)
-{
- /* bits 0..3 reserved => mask with 0xf0 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0xf0;
-
- mutex_lock(&data->update_lock);
- data->watchdog[2] &= ~0xf0;
- data->watchdog[2] |= v;
- fscher_write_value(client, reg, data->watchdog[2]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_watchdog_control(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 0..3 reserved, bit 5 write only => mask with 0xd0 */
- return sprintf(buf, "%u\n", data->watchdog[2] & 0xd0);
-}
-
-static ssize_t set_watchdog_status(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- /* bits 0, 2..7 reserved => mask with 0x02 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02;
-
- mutex_lock(&data->update_lock);
- data->watchdog[1] &= ~v;
- fscher_write_value(client, reg, v);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_watchdog_status(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 0, 2..7 reserved => mask with 0x02 */
- return sprintf(buf, "%u\n", data->watchdog[1] & 0x02);
-}
-
-static ssize_t set_watchdog_preset(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0xff;
-
- mutex_lock(&data->update_lock);
- data->watchdog[0] = v;
- fscher_write_value(client, reg, data->watchdog[0]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_watchdog_preset(struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", data->watchdog[0]);
-}
-
-static int __init sensors_fscher_init(void)
-{
- return i2c_add_driver(&fscher_driver);
-}
-
-static void __exit sensors_fscher_exit(void)
-{
- i2c_del_driver(&fscher_driver);
-}
-
-MODULE_AUTHOR("Reinhard Nissl <rnissl@gmx.de>");
-MODULE_DESCRIPTION("FSC Hermes driver");
-MODULE_LICENSE("GPL");
-
-module_init(sensors_fscher_init);
-module_exit(sensors_fscher_exit);
diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c
deleted file mode 100644
index 8a7bcf500b4..00000000000
--- a/drivers/hwmon/fscpos.c
+++ /dev/null
@@ -1,654 +0,0 @@
-/*
- fscpos.c - Kernel module for hardware monitoring with FSC Poseidon chips
- Copyright (C) 2004, 2005 Stefan Ott <stefan@desire.ch>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-/*
- fujitsu siemens poseidon chip,
- module based on the old fscpos module by Hermann Jung <hej@odn.de> and
- the fscher module by Reinhard Nissl <rnissl@gmx.de>
-
- original module based on lm80.c
- Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl>
- and Philip Edelbrock <phil@netroedge.com>
-
- Thanks to Jean Delvare for reviewing my code and suggesting a lot of
- improvements.
-*/
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/i2c.h>
-#include <linux/init.h>
-#include <linux/hwmon.h>
-#include <linux/err.h>
-#include <linux/mutex.h>
-#include <linux/sysfs.h>
-
-/*
- * Addresses to scan
- */
-static const unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END };
-
-/*
- * Insmod parameters
- */
-I2C_CLIENT_INSMOD_1(fscpos);
-
-/*
- * The FSCPOS registers
- */
-
-/* chip identification */
-#define FSCPOS_REG_IDENT_0 0x00
-#define FSCPOS_REG_IDENT_1 0x01
-#define FSCPOS_REG_IDENT_2 0x02
-#define FSCPOS_REG_REVISION 0x03
-
-/* global control and status */
-#define FSCPOS_REG_EVENT_STATE 0x04
-#define FSCPOS_REG_CONTROL 0x05
-
-/* watchdog */
-#define FSCPOS_REG_WDOG_PRESET 0x28
-#define FSCPOS_REG_WDOG_STATE 0x23
-#define FSCPOS_REG_WDOG_CONTROL 0x21
-
-/* voltages */
-#define FSCPOS_REG_VOLT_12 0x45
-#define FSCPOS_REG_VOLT_5 0x42
-#define FSCPOS_REG_VOLT_BATT 0x48
-
-/* fans - the chip does not support minimum speed for fan2 */
-static u8 FSCPOS_REG_PWM[] = { 0x55, 0x65 };
-static u8 FSCPOS_REG_FAN_ACT[] = { 0x0e, 0x6b, 0xab };
-static u8 FSCPOS_REG_FAN_STATE[] = { 0x0d, 0x62, 0xa2 };
-static u8 FSCPOS_REG_FAN_RIPPLE[] = { 0x0f, 0x6f, 0xaf };
-
-/* temperatures */
-static u8 FSCPOS_REG_TEMP_ACT[] = { 0x64, 0x32, 0x35 };
-static u8 FSCPOS_REG_TEMP_STATE[] = { 0x71, 0x81, 0x91 };
-
-/*
- * Functions declaration
- */
-static int fscpos_probe(struct i2c_client *client,
- const struct i2c_device_id *id);
-static int fscpos_detect(struct i2c_client *client, int kind,
- struct i2c_board_info *info);
-static int fscpos_remove(struct i2c_client *client);
-
-static int fscpos_read_value(struct i2c_client *client, u8 reg);
-static int fscpos_write_value(struct i2c_client *client, u8 reg, u8 value);
-static struct fscpos_data *fscpos_update_device(struct device *dev);
-static void fscpos_init_client(struct i2c_client *client);
-
-static void reset_fan_alarm(struct i2c_client *client, int nr);
-
-/*
- * Driver data (common to all clients)
- */
-static const struct i2c_device_id fscpos_id[] = {
- { "fscpos", fscpos },
- { }
-};
-
-static struct i2c_driver fscpos_driver = {
- .class = I2C_CLASS_HWMON,
- .driver = {
- .name = "fscpos",
- },
- .probe = fscpos_probe,
- .remove = fscpos_remove,
- .id_table = fscpos_id,
- .detect = fscpos_detect,
- .address_data = &addr_data,
-};
-
-/*
- * Client data (each client gets its own)
- */
-struct fscpos_data {
- struct device *hwmon_dev;
- struct mutex update_lock;
- char valid; /* 0 until following fields are valid */
- unsigned long last_updated; /* In jiffies */
-
- /* register values */
- u8 revision; /* revision of chip */
- u8 global_event; /* global event status */
- u8 global_control; /* global control register */
- u8 wdog_control; /* watchdog control */
- u8 wdog_state; /* watchdog status */
- u8 wdog_preset; /* watchdog preset */
- u8 volt[3]; /* 12, 5, battery current */
- u8 temp_act[3]; /* temperature */
- u8 temp_status[3]; /* status of sensor */
- u8 fan_act[3]; /* fans revolutions per second */
- u8 fan_status[3]; /* fan status */
- u8 pwm[2]; /* fan min value for rps */
- u8 fan_ripple[3]; /* divider for rps */
-};
-
-/* Temperature */
-#define TEMP_FROM_REG(val) (((val) - 128) * 1000)
-
-static ssize_t show_temp_input(struct fscpos_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_act[nr - 1]));
-}
-
-static ssize_t show_temp_status(struct fscpos_data *data, char *buf, int nr)
-{
- /* bits 2..7 reserved => mask with 0x03 */
- return sprintf(buf, "%u\n", data->temp_status[nr - 1] & 0x03);
-}
-
-static ssize_t show_temp_reset(struct fscpos_data *data, char *buf, int nr)
-{
- return sprintf(buf, "1\n");
-}
-
-static ssize_t set_temp_reset(struct i2c_client *client, struct fscpos_data
- *data, const char *buf, size_t count, int nr, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10);
- if (v != 1) {
- dev_err(&client->dev, "temp_reset value %ld not supported. "
- "Use 1 to reset the alarm!\n", v);
- return -EINVAL;
- }
-
- dev_info(&client->dev, "You used the temp_reset feature which has not "
- "been proplerly tested. Please report your "
- "experience to the module author.\n");
-
- /* Supported value: 2 (clears the status) */
- fscpos_write_value(client, FSCPOS_REG_TEMP_STATE[nr - 1], 2);
- return count;
-}
-
-/* Fans */
-#define RPM_FROM_REG(val) ((val) * 60)
-
-static ssize_t show_fan_status(struct fscpos_data *data, char *buf, int nr)
-{
- /* bits 0..1, 3..7 reserved => mask with 0x04 */
- return sprintf(buf, "%u\n", data->fan_status[nr - 1] & 0x04);
-}
-
-static ssize_t show_fan_input(struct fscpos_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", RPM_FROM_REG(data->fan_act[nr - 1]));
-}
-
-static ssize_t show_fan_ripple(struct fscpos_data *data, char *buf, int nr)
-{
- /* bits 2..7 reserved => mask with 0x03 */
- return sprintf(buf, "%u\n", data->fan_ripple[nr - 1] & 0x03);
-}
-
-static ssize_t set_fan_ripple(struct i2c_client *client, struct fscpos_data
- *data, const char *buf, size_t count, int nr, int reg)
-{
- /* supported values: 2, 4, 8 */
- unsigned long v = simple_strtoul(buf, NULL, 10);
-
- switch (v) {
- case 2: v = 1; break;
- case 4: v = 2; break;
- case 8: v = 3; break;
- default:
- dev_err(&client->dev, "fan_ripple value %ld not supported. "
- "Must be one of 2, 4 or 8!\n", v);
- return -EINVAL;
- }
-
- mutex_lock(&data->update_lock);
- /* bits 2..7 reserved => mask with 0x03 */
- data->fan_ripple[nr - 1] &= ~0x03;
- data->fan_ripple[nr - 1] |= v;
-
- fscpos_write_value(client, reg, data->fan_ripple[nr - 1]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_pwm(struct fscpos_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", data->pwm[nr - 1]);
-}
-
-static ssize_t set_pwm(struct i2c_client *client, struct fscpos_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10);
-
- /* Range: 0..255 */
- if (v < 0) v = 0;
- if (v > 255) v = 255;
-
- mutex_lock(&data->update_lock);
- data->pwm[nr - 1] = v;
- fscpos_write_value(client, reg, data->pwm[nr - 1]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static void reset_fan_alarm(struct i2c_client *client, int nr)
-{
- fscpos_write_value(client, FSCPOS_REG_FAN_STATE[nr], 4);
-}
-
-/* Volts */
-#define VOLT_FROM_REG(val, mult) ((val) * (mult) / 255)
-
-static ssize_t show_volt_12(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct fscpos_data *data = fscpos_update_device(dev);
- return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[0], 14200));
-}
-
-static ssize_t show_volt_5(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct fscpos_data *data = fscpos_update_device(dev);
- return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[1], 6600));
-}
-
-static ssize_t show_volt_batt(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct fscpos_data *data = fscpos_update_device(dev);
- return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[2], 3300));
-}
-
-/* Watchdog */
-static ssize_t show_wdog_control(struct fscpos_data *data, char *buf)
-{
- /* bits 0..3 reserved, bit 6 write only => mask with 0xb0 */
- return sprintf(buf, "%u\n", data->wdog_control & 0xb0);
-}
-
-static ssize_t set_wdog_control(struct i2c_client *client, struct fscpos_data
- *data, const char *buf, size_t count, int reg)
-{
- /* bits 0..3 reserved => mask with 0xf0 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0xf0;
-
- mutex_lock(&data->update_lock);
- data->wdog_control &= ~0xf0;
- data->wdog_control |= v;
- fscpos_write_value(client, reg, data->wdog_control);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_wdog_state(struct fscpos_data *data, char *buf)
-{
- /* bits 0, 2..7 reserved => mask with 0x02 */
- return sprintf(buf, "%u\n", data->wdog_state & 0x02);
-}
-
-static ssize_t set_wdog_state(struct i2c_client *client, struct fscpos_data
- *data, const char *buf, size_t count, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02;
-
- /* Valid values: 2 (clear) */
- if (v != 2) {
- dev_err(&client->dev, "wdog_state value %ld not supported. "
- "Must be 2 to clear the state!\n", v);
- return -EINVAL;
- }
-
- mutex_lock(&data->update_lock);
- data->wdog_state &= ~v;
- fscpos_write_value(client, reg, v);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_wdog_preset(struct fscpos_data *data, char *buf)
-{
- return sprintf(buf, "%u\n", data->wdog_preset);
-}
-
-static ssize_t set_wdog_preset(struct i2c_client *client, struct fscpos_data
- *data, const char *buf, size_t count, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0xff;
-
- mutex_lock(&data->update_lock);
- data->wdog_preset = v;
- fscpos_write_value(client, reg, data->wdog_preset);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-/* Event */
-static ssize_t show_event(struct device *dev, struct device_attribute *attr, char *buf)
-{
- /* bits 5..7 reserved => mask with 0x1f */
- struct fscpos_data *data = fscpos_update_device(dev);
- return sprintf(buf, "%u\n", data->global_event & 0x9b);
-}
-
-/*
- * Sysfs stuff
- */
-#define create_getter(kind, sub) \
- static ssize_t sysfs_show_##kind##sub(struct device *dev, struct device_attribute *attr, char *buf) \
- { \
- struct fscpos_data *data = fscpos_update_device(dev); \
- return show_##kind##sub(data, buf); \
- }
-
-#define create_getter_n(kind, offset, sub) \
- static ssize_t sysfs_show_##kind##offset##sub(struct device *dev, struct device_attribute *attr, char\
- *buf) \
- { \
- struct fscpos_data *data = fscpos_update_device(dev); \
- return show_##kind##sub(data, buf, offset); \
- }
-
-#define create_setter(kind, sub, reg) \
- static ssize_t sysfs_set_##kind##sub (struct device *dev, struct device_attribute *attr, const char \
- *buf, size_t count) \
- { \
- struct i2c_client *client = to_i2c_client(dev); \
- struct fscpos_data *data = i2c_get_clientdata(client); \
- return set_##kind##sub(client, data, buf, count, reg); \
- }
-
-#define create_setter_n(kind, offset, sub, reg) \
- static ssize_t sysfs_set_##kind##offset##sub (struct device *dev, struct device_attribute *attr, \
- const char *buf, size_t count) \
- { \
- struct i2c_client *client = to_i2c_client(dev); \
- struct fscpos_data *data = i2c_get_clientdata(client); \
- return set_##kind##sub(client, data, buf, count, offset, reg);\
- }
-
-#define create_sysfs_device_ro(kind, sub, offset) \
- static DEVICE_ATTR(kind##offset##sub, S_IRUGO, \
- sysfs_show_##kind##offset##sub, NULL);
-
-#define create_sysfs_device_rw(kind, sub, offset) \
- static DEVICE_ATTR(kind##offset##sub, S_IRUGO | S_IWUSR, \
- sysfs_show_##kind##offset##sub, sysfs_set_##kind##offset##sub);
-
-#define sysfs_ro_n(kind, sub, offset) \
- create_getter_n(kind, offset, sub); \
- create_sysfs_device_ro(kind, sub, offset);
-
-#define sysfs_rw_n(kind, sub, offset, reg) \
- create_getter_n(kind, offset, sub); \
- create_setter_n(kind, offset, sub, reg); \
- create_sysfs_device_rw(kind, sub, offset);
-
-#define sysfs_rw(kind, sub, reg) \
- create_getter(kind, sub); \
- create_setter(kind, sub, reg); \
- create_sysfs_device_rw(kind, sub,);
-
-#define sysfs_fan_with_min(offset, reg_status, reg_ripple, reg_min) \
- sysfs_fan(offset, reg_status, reg_ripple); \
- sysfs_rw_n(pwm,, offset, reg_min);
-
-#define sysfs_fan(offset, reg_status, reg_ripple) \
- sysfs_ro_n(fan, _input, offset); \
- sysfs_ro_n(fan, _status, offset); \
- sysfs_rw_n(fan, _ripple, offset, reg_ripple);
-
-#define sysfs_temp(offset, reg_status) \
- sysfs_ro_n(temp, _input, offset); \
- sysfs_ro_n(temp, _status, offset); \
- sysfs_rw_n(temp, _reset, offset, reg_status);
-
-#define sysfs_watchdog(reg_wdog_preset, reg_wdog_state, reg_wdog_control) \
- sysfs_rw(wdog, _control, reg_wdog_control); \
- sysfs_rw(wdog, _preset, reg_wdog_preset); \
- sysfs_rw(wdog, _state, reg_wdog_state);
-
-sysfs_fan_with_min(1, FSCPOS_REG_FAN_STATE[0], FSCPOS_REG_FAN_RIPPLE[0],
- FSCPOS_REG_PWM[0]);
-sysfs_fan_with_min(2, FSCPOS_REG_FAN_STATE[1], FSCPOS_REG_FAN_RIPPLE[1],
- FSCPOS_REG_PWM[1]);
-sysfs_fan(3, FSCPOS_REG_FAN_STATE[2], FSCPOS_REG_FAN_RIPPLE[2]);
-
-sysfs_temp(1, FSCPOS_REG_TEMP_STATE[0]);
-sysfs_temp(2, FSCPOS_REG_TEMP_STATE[1]);
-sysfs_temp(3, FSCPOS_REG_TEMP_STATE[2]);
-
-sysfs_watchdog(FSCPOS_REG_WDOG_PRESET, FSCPOS_REG_WDOG_STATE,
- FSCPOS_REG_WDOG_CONTROL);
-
-static DEVICE_ATTR(event, S_IRUGO, show_event, NULL);
-static DEVICE_ATTR(in0_input, S_IRUGO, show_volt_12, NULL);
-static DEVICE_ATTR(in1_input, S_IRUGO, show_volt_5, NULL);
-static DEVICE_ATTR(in2_input, S_IRUGO, show_volt_batt, NULL);
-
-static struct attribute *fscpos_attributes[] = {
- &dev_attr_event.attr,
- &dev_attr_in0_input.attr,
- &dev_attr_in1_input.attr,
- &dev_attr_in2_input.attr,
-
- &dev_attr_wdog_control.attr,
- &dev_attr_wdog_preset.attr,
- &dev_attr_wdog_state.attr,
-
- &dev_attr_temp1_input.attr,
- &dev_attr_temp1_status.attr,
- &dev_attr_temp1_reset.attr,
- &dev_attr_temp2_input.attr,
- &dev_attr_temp2_status.attr,
- &dev_attr_temp2_reset.attr,
- &dev_attr_temp3_input.attr,
- &dev_attr_temp3_status.attr,
- &dev_attr_temp3_reset.attr,
-
- &dev_attr_fan1_input.attr,
- &dev_attr_fan1_status.attr,
- &dev_attr_fan1_ripple.attr,
- &dev_attr_pwm1.attr,
- &dev_attr_fan2_input.attr,
- &dev_attr_fan2_status.attr,
- &dev_attr_fan2_ripple.attr,
- &dev_attr_pwm2.attr,
- &dev_attr_fan3_input.attr,
- &dev_attr_fan3_status.attr,
- &dev_attr_fan3_ripple.attr,
- NULL
-};
-
-static const struct attribute_group fscpos_group = {
- .attrs = fscpos_attributes,
-};
-
-/* Return 0 if detection is successful, -ENODEV otherwise */
-static int fscpos_detect(struct i2c_client *new_client, int kind,
- struct i2c_board_info *info)
-{
- struct i2c_adapter *adapter = new_client->adapter;
-
- if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
- return -ENODEV;
-
- /* Do the remaining detection unless force or force_fscpos parameter */
- if (kind < 0) {
- if ((fscpos_read_value(new_client, FSCPOS_REG_IDENT_0)
- != 0x50) /* 'P' */
- || (fscpos_read_value(new_client, FSCPOS_REG_IDENT_1)
- != 0x45) /* 'E' */
- || (fscpos_read_value(new_client, FSCPOS_REG_IDENT_2)
- != 0x47))/* 'G' */
- return -ENODEV;
- }
-
- strlcpy(info->type, "fscpos", I2C_NAME_SIZE);
-
- return 0;
-}
-
-static int fscpos_probe(struct i2c_client *new_client,
- const struct i2c_device_id *id)
-{
- struct fscpos_data *data;
- int err;
-
- data = kzalloc(sizeof(struct fscpos_data), GFP_KERNEL);
- if (!data) {
- err = -ENOMEM;
- goto exit;
- }
-
- i2c_set_clientdata(new_client, data);
- data->valid = 0;
- mutex_init(&data->update_lock);
-
- /* Inizialize the fscpos chip */
- fscpos_init_client(new_client);
-
- /* Announce that the chip was found */
- dev_info(&new_client->dev, "Found fscpos chip, rev %u\n", data->revision);
-
- /* Register sysfs hooks */
- if ((err = sysfs_create_group(&new_client->dev.kobj, &fscpos_group)))
- goto exit_free;
-
- data->hwmon_dev = hwmon_device_register(&new_client->dev);
- if (IS_ERR(data->hwmon_dev)) {
- err = PTR_ERR(data->hwmon_dev);
- goto exit_remove_files;
- }
-
- return 0;
-
-exit_remove_files:
- sysfs_remove_group(&new_client->dev.kobj, &fscpos_group);
-exit_free:
- kfree(data);
-exit:
- return err;
-}
-
-static int fscpos_remove(struct i2c_client *client)
-{
- struct fscpos_data *data = i2c_get_clientdata(client);
-
- hwmon_device_unregister(data->hwmon_dev);
- sysfs_remove_group(&client->dev.kobj, &fscpos_group);
-
- kfree(data);
- return 0;
-}
-
-static int fscpos_read_value(struct i2c_client *client, u8 reg)
-{
- dev_dbg(&client->dev, "Read reg 0x%02x\n", reg);
- return i2c_smbus_read_byte_data(client, reg);
-}
-
-static int fscpos_write_value(struct i2c_client *client, u8 reg, u8 value)
-{
- dev_dbg(&client->dev, "Write reg 0x%02x, val 0x%02x\n", reg, value);
- return i2c_smbus_write_byte_data(client, reg, value);
-}
-
-/* Called when we have found a new FSCPOS chip */
-static void fscpos_init_client(struct i2c_client *client)
-{
- struct fscpos_data *data = i2c_get_clientdata(client);
-
- /* read revision from chip */
- data->revision = fscpos_read_value(client, FSCPOS_REG_REVISION);
-}
-
-static struct fscpos_data *fscpos_update_device(struct device *dev)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct fscpos_data *data = i2c_get_clientdata(client);
-
- mutex_lock(&data->update_lock);
-
- if (time_after(jiffies, data->last_updated + 2 * HZ) || !data->valid) {
- int i;
-
- dev_dbg(&client->dev, "Starting fscpos update\n");
-
- for (i = 0; i < 3; i++) {
- data->temp_act[i] = fscpos_read_value(client,
- FSCPOS_REG_TEMP_ACT[i]);
- data->temp_status[i] = fscpos_read_value(client,
- FSCPOS_REG_TEMP_STATE[i]);
- data->fan_act[i] = fscpos_read_value(client,
- FSCPOS_REG_FAN_ACT[i]);
- data->fan_status[i] = fscpos_read_value(client,
- FSCPOS_REG_FAN_STATE[i]);
- data->fan_ripple[i] = fscpos_read_value(client,
- FSCPOS_REG_FAN_RIPPLE[i]);
- if (i < 2) {
- /* fan2_min is not supported by the chip */
- data->pwm[i] = fscpos_read_value(client,
- FSCPOS_REG_PWM[i]);
- }
- /* reset fan status if speed is back to > 0 */
- if (data->fan_status[i] != 0 && data->fan_act[i] > 0) {
- reset_fan_alarm(client, i);
- }
- }
-
- data->volt[0] = fscpos_read_value(client, FSCPOS_REG_VOLT_12);
- data->volt[1] = fscpos_read_value(client, FSCPOS_REG_VOLT_5);
- data->volt[2] = fscpos_read_value(client, FSCPOS_REG_VOLT_BATT);
-
- data->wdog_preset = fscpos_read_value(client,
- FSCPOS_REG_WDOG_PRESET);
- data->wdog_state = fscpos_read_value(client,
- FSCPOS_REG_WDOG_STATE);
- data->wdog_control = fscpos_read_value(client,
- FSCPOS_REG_WDOG_CONTROL);
-
- data->global_event = fscpos_read_value(client,
- FSCPOS_REG_EVENT_STATE);
-
- data->last_updated = jiffies;
- data->valid = 1;
- }
- mutex_unlock(&data->update_lock);
- return data;
-}
-
-static int __init sm_fscpos_init(void)
-{
- return i2c_add_driver(&fscpos_driver);
-}
-
-static void __exit sm_fscpos_exit(void)
-{
- i2c_del_driver(&fscpos_driver);
-}
-
-MODULE_AUTHOR("Stefan Ott <stefan@desire.ch> based on work from Hermann Jung "
- "<hej@odn.de>, Frodo Looijaard <frodol@dds.nl>"
- " and Philip Edelbrock <phil@netroedge.com>");
-MODULE_DESCRIPTION("fujitsu siemens poseidon chip driver");
-MODULE_LICENSE("GPL");
-
-module_init(sm_fscpos_init);
-module_exit(sm_fscpos_exit);
diff --git a/drivers/hwmon/ltc4215.c b/drivers/hwmon/ltc4215.c
index 9386e2a3921..6c9a04136e0 100644
--- a/drivers/hwmon/ltc4215.c
+++ b/drivers/hwmon/ltc4215.c
@@ -259,7 +259,7 @@ static int ltc4215_probe(struct i2c_client *client,
mutex_init(&data->update_lock);
/* Initialize the LTC4215 chip */
- /* TODO */
+ i2c_smbus_write_byte_data(client, LTC4215_FAULT, 0x00);
/* Register sysfs hooks */
ret = sysfs_create_group(&client->dev.kobj, &ltc4215_group);
diff --git a/drivers/hwmon/ltc4245.c b/drivers/hwmon/ltc4245.c
index 034b2c51584..e3896433361 100644
--- a/drivers/hwmon/ltc4245.c
+++ b/drivers/hwmon/ltc4245.c
@@ -382,7 +382,8 @@ static int ltc4245_probe(struct i2c_client *client,
mutex_init(&data->update_lock);
/* Initialize the LTC4245 chip */
- /* TODO */
+ i2c_smbus_write_byte_data(client, LTC4245_FAULT1, 0x00);
+ i2c_smbus_write_byte_data(client, LTC4245_FAULT2, 0x00);
/* Register sysfs hooks */
ret = sysfs_create_group(&client->dev.kobj, &ltc4245_group);
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
index 949c97ff57e..1f20a042a4f 100644
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -29,8 +29,8 @@
#include <asm/idle.h>
-#include "../dma/ioatdma_hw.h"
-#include "../dma/ioatdma_registers.h"
+#include "../dma/ioat/hw.h"
+#include "../dma/ioat/registers.h"
#define I7300_IDLE_DRIVER_VERSION "1.55"
#define I7300_PRINT "i7300_idle:"
@@ -126,9 +126,9 @@ static void i7300_idle_ioat_stop(void)
udelay(10);
sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
- IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+ IOAT_CHANSTS_STATUS;
- if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE)
+ if (sts != IOAT_CHANSTS_ACTIVE)
break;
}
@@ -160,9 +160,9 @@ static int __init i7300_idle_ioat_selftest(u8 *ctl,
udelay(1000);
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
- IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+ IOAT_CHANSTS_STATUS;
- if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) {
+ if (chan_sts != IOAT_CHANSTS_DONE) {
/* Not complete, reset the channel */
writeb(IOAT_CHANCMD_RESET,
ioat_chanbase + IOAT1_CHANCMD_OFFSET);
@@ -288,9 +288,9 @@ static void __exit i7300_idle_ioat_exit(void)
ioat_chanbase + IOAT1_CHANCMD_OFFSET);
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
- IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+ IOAT_CHANSTS_STATUS;
- if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) {
+ if (chan_sts != IOAT_CHANSTS_ACTIVE) {
writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
break;
}
@@ -298,14 +298,14 @@ static void __exit i7300_idle_ioat_exit(void)
}
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
- IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+ IOAT_CHANSTS_STATUS;
/*
* We tried to reset multiple times. If IO A/T channel is still active
* flag an error and return without cleanup. Memory leak is better
* than random corruption in that extreme error situation.
*/
- if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) {
+ if (chan_sts == IOAT_CHANSTS_ACTIVE) {
printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels."
" Not freeing resources\n");
return;
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 556539d617a..e828aab7dac 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -11,6 +11,7 @@
*/
#include <linux/init.h>
+#include <linux/types.h>
#include <linux/input.h>
#include <linux/module.h>
#include <linux/random.h>
@@ -514,7 +515,7 @@ static void input_disconnect_device(struct input_dev *dev)
* that there are no threads in the middle of input_open_device()
*/
mutex_lock(&dev->mutex);
- dev->going_away = 1;
+ dev->going_away = true;
mutex_unlock(&dev->mutex);
spin_lock_irq(&dev->event_lock);
@@ -1259,10 +1260,71 @@ static int input_dev_uevent(struct device *device, struct kobj_uevent_env *env)
return 0;
}
+#define INPUT_DO_TOGGLE(dev, type, bits, on) \
+ do { \
+ int i; \
+ if (!test_bit(EV_##type, dev->evbit)) \
+ break; \
+ for (i = 0; i < type##_MAX; i++) { \
+ if (!test_bit(i, dev->bits##bit) || \
+ !test_bit(i, dev->bits)) \
+ continue; \
+ dev->event(dev, EV_##type, i, on); \
+ } \
+ } while (0)
+
+static void input_dev_reset(struct input_dev *dev, bool activate)
+{
+ if (!dev->event)
+ return;
+
+ INPUT_DO_TOGGLE(dev, LED, led, activate);
+ INPUT_DO_TOGGLE(dev, SND, snd, activate);
+
+ if (activate && test_bit(EV_REP, dev->evbit)) {
+ dev->event(dev, EV_REP, REP_PERIOD, dev->rep[REP_PERIOD]);
+ dev->event(dev, EV_REP, REP_DELAY, dev->rep[REP_DELAY]);
+ }
+}
+
+#ifdef CONFIG_PM
+static int input_dev_suspend(struct device *dev)
+{
+ struct input_dev *input_dev = to_input_dev(dev);
+
+ mutex_lock(&input_dev->mutex);
+ input_dev_reset(input_dev, false);
+ mutex_unlock(&input_dev->mutex);
+
+ return 0;
+}
+
+static int input_dev_resume(struct device *dev)
+{
+ struct input_dev *input_dev = to_input_dev(dev);
+
+ mutex_lock(&input_dev->mutex);
+ input_dev_reset(input_dev, true);
+ mutex_unlock(&input_dev->mutex);
+
+ return 0;
+}
+
+static const struct dev_pm_ops input_dev_pm_ops = {
+ .suspend = input_dev_suspend,
+ .resume = input_dev_resume,
+ .poweroff = input_dev_suspend,
+ .restore = input_dev_resume,
+};
+#endif /* CONFIG_PM */
+
static struct device_type input_dev_type = {
.groups = input_dev_attr_groups,
.release = input_dev_release,
.uevent = input_dev_uevent,
+#ifdef CONFIG_PM
+ .pm = &input_dev_pm_ops,
+#endif
};
static char *input_devnode(struct device *dev, mode_t *mode)
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 3525c19be42..ee98b1bc5d8 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -24,6 +24,16 @@ config KEYBOARD_AAED2000
To compile this driver as a module, choose M here: the
module will be called aaed2000_kbd.
+config KEYBOARD_ADP5588
+ tristate "ADP5588 I2C QWERTY Keypad and IO Expander"
+ depends on I2C
+ help
+ Say Y here if you want to use a ADP5588 attached to your
+ system I2C bus.
+
+ To compile this driver as a module, choose M here: the
+ module will be called adp5588-keys.
+
config KEYBOARD_AMIGA
tristate "Amiga keyboard"
depends on AMIGA
@@ -104,6 +114,16 @@ config KEYBOARD_ATKBD_RDI_KEYCODES
right-hand column will be interpreted as the key shown in the
left-hand column.
+config QT2160
+ tristate "Atmel AT42QT2160 Touch Sensor Chip"
+ depends on I2C && EXPERIMENTAL
+ help
+ If you say yes here you get support for Atmel AT42QT2160 Touch
+ Sensor chip as a keyboard input.
+
+ This driver can also be built as a module. If so, the module
+ will be called qt2160.
+
config KEYBOARD_BFIN
tristate "Blackfin BF54x keypad support"
depends on (BF54x && !BF544)
@@ -251,6 +271,17 @@ config KEYBOARD_MAPLE
To compile this driver as a module, choose M here: the
module will be called maple_keyb.
+config KEYBOARD_MAX7359
+ tristate "Maxim MAX7359 Key Switch Controller"
+ depends on I2C
+ help
+ If you say yes here you get support for the Maxim MAX7359 Key
+ Switch Controller chip. This providers microprocessors with
+ management of up to 64 key switches
+
+ To compile this driver as a module, choose M here: the
+ module will be called max7359_keypad.
+
config KEYBOARD_NEWTON
tristate "Newton keyboard"
select SERIO
@@ -260,6 +291,15 @@ config KEYBOARD_NEWTON
To compile this driver as a module, choose M here: the
module will be called newtonkbd.
+config KEYBOARD_OPENCORES
+ tristate "OpenCores Keyboard Controller"
+ help
+ Say Y here if you want to use the OpenCores Keyboard Controller
+ http://www.opencores.org/project,keyboardcontroller
+
+ To compile this driver as a module, choose M here; the
+ module will be called opencores-kbd.
+
config KEYBOARD_PXA27x
tristate "PXA27x/PXA3xx keypad support"
depends on PXA27x || PXA3xx
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 8a7a22b3026..babad5e58b7 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -5,6 +5,7 @@
# Each configuration option enables a list of files.
obj-$(CONFIG_KEYBOARD_AAED2000) += aaed2000_kbd.o
+obj-$(CONFIG_KEYBOARD_ADP5588) += adp5588-keys.o
obj-$(CONFIG_KEYBOARD_AMIGA) += amikbd.o
obj-$(CONFIG_KEYBOARD_ATARI) += atakbd.o
obj-$(CONFIG_KEYBOARD_ATKBD) += atkbd.o
@@ -21,10 +22,13 @@ obj-$(CONFIG_KEYBOARD_LM8323) += lm8323.o
obj-$(CONFIG_KEYBOARD_LOCOMO) += locomokbd.o
obj-$(CONFIG_KEYBOARD_MAPLE) += maple_keyb.o
obj-$(CONFIG_KEYBOARD_MATRIX) += matrix_keypad.o
+obj-$(CONFIG_KEYBOARD_MAX7359) += max7359_keypad.o
obj-$(CONFIG_KEYBOARD_NEWTON) += newtonkbd.o
obj-$(CONFIG_KEYBOARD_OMAP) += omap-keypad.o
+obj-$(CONFIG_KEYBOARD_OPENCORES) += opencores-kbd.o
obj-$(CONFIG_KEYBOARD_PXA27x) += pxa27x_keypad.o
obj-$(CONFIG_KEYBOARD_PXA930_ROTARY) += pxa930_rotary.o
+obj-$(CONFIG_KEYBOARD_QT2160) += qt2160.o
obj-$(CONFIG_KEYBOARD_SH_KEYSC) += sh_keysc.o
obj-$(CONFIG_KEYBOARD_SPITZ) += spitzkbd.o
obj-$(CONFIG_KEYBOARD_STOWAWAY) += stowaway.o
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
new file mode 100644
index 00000000000..d48c808d592
--- /dev/null
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -0,0 +1,361 @@
+/*
+ * File: drivers/input/keyboard/adp5588_keys.c
+ * Description: keypad driver for ADP5588 I2C QWERTY Keypad and IO Expander
+ * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (C) 2008-2009 Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/i2c.h>
+
+#include <linux/i2c/adp5588.h>
+
+ /* Configuration Register1 */
+#define AUTO_INC (1 << 7)
+#define GPIEM_CFG (1 << 6)
+#define OVR_FLOW_M (1 << 5)
+#define INT_CFG (1 << 4)
+#define OVR_FLOW_IEN (1 << 3)
+#define K_LCK_IM (1 << 2)
+#define GPI_IEN (1 << 1)
+#define KE_IEN (1 << 0)
+
+/* Interrupt Status Register */
+#define CMP2_INT (1 << 5)
+#define CMP1_INT (1 << 4)
+#define OVR_FLOW_INT (1 << 3)
+#define K_LCK_INT (1 << 2)
+#define GPI_INT (1 << 1)
+#define KE_INT (1 << 0)
+
+/* Key Lock and Event Counter Register */
+#define K_LCK_EN (1 << 6)
+#define LCK21 0x30
+#define KEC 0xF
+
+/* Key Event Register xy */
+#define KEY_EV_PRESSED (1 << 7)
+#define KEY_EV_MASK (0x7F)
+
+#define KP_SEL(x) (0xFFFF >> (16 - x)) /* 2^x-1 */
+
+#define KEYP_MAX_EVENT 10
+
+/*
+ * Early pre 4.0 Silicon required to delay readout by at least 25ms,
+ * since the Event Counter Register updated 25ms after the interrupt
+ * asserted.
+ */
+#define WA_DELAYED_READOUT_REVID(rev) ((rev) < 4)
+
+struct adp5588_kpad {
+ struct i2c_client *client;
+ struct input_dev *input;
+ struct delayed_work work;
+ unsigned long delay;
+ unsigned short keycode[ADP5588_KEYMAPSIZE];
+};
+
+static int adp5588_read(struct i2c_client *client, u8 reg)
+{
+ int ret = i2c_smbus_read_byte_data(client, reg);
+
+ if (ret < 0)
+ dev_err(&client->dev, "Read Error\n");
+
+ return ret;
+}
+
+static int adp5588_write(struct i2c_client *client, u8 reg, u8 val)
+{
+ return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static void adp5588_work(struct work_struct *work)
+{
+ struct adp5588_kpad *kpad = container_of(work,
+ struct adp5588_kpad, work.work);
+ struct i2c_client *client = kpad->client;
+ int i, key, status, ev_cnt;
+
+ status = adp5588_read(client, INT_STAT);
+
+ if (status & OVR_FLOW_INT) /* Unlikely and should never happen */
+ dev_err(&client->dev, "Event Overflow Error\n");
+
+ if (status & KE_INT) {
+ ev_cnt = adp5588_read(client, KEY_LCK_EC_STAT) & KEC;
+ if (ev_cnt) {
+ for (i = 0; i < ev_cnt; i++) {
+ key = adp5588_read(client, Key_EVENTA + i);
+ input_report_key(kpad->input,
+ kpad->keycode[(key & KEY_EV_MASK) - 1],
+ key & KEY_EV_PRESSED);
+ }
+ input_sync(kpad->input);
+ }
+ }
+ adp5588_write(client, INT_STAT, status); /* Status is W1C */
+}
+
+static irqreturn_t adp5588_irq(int irq, void *handle)
+{
+ struct adp5588_kpad *kpad = handle;
+
+ /*
+ * use keventd context to read the event fifo registers
+ * Schedule readout at least 25ms after notification for
+ * REVID < 4
+ */
+
+ schedule_delayed_work(&kpad->work, kpad->delay);
+
+ return IRQ_HANDLED;
+}
+
+static int __devinit adp5588_setup(struct i2c_client *client)
+{
+ struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+ int i, ret;
+
+ ret = adp5588_write(client, KP_GPIO1, KP_SEL(pdata->rows));
+ ret |= adp5588_write(client, KP_GPIO2, KP_SEL(pdata->cols) & 0xFF);
+ ret |= adp5588_write(client, KP_GPIO3, KP_SEL(pdata->cols) >> 8);
+
+ if (pdata->en_keylock) {
+ ret |= adp5588_write(client, UNLOCK1, pdata->unlock_key1);
+ ret |= adp5588_write(client, UNLOCK2, pdata->unlock_key2);
+ ret |= adp5588_write(client, KEY_LCK_EC_STAT, K_LCK_EN);
+ }
+
+ for (i = 0; i < KEYP_MAX_EVENT; i++)
+ ret |= adp5588_read(client, Key_EVENTA);
+
+ ret |= adp5588_write(client, INT_STAT, CMP2_INT | CMP1_INT |
+ OVR_FLOW_INT | K_LCK_INT |
+ GPI_INT | KE_INT); /* Status is W1C */
+
+ ret |= adp5588_write(client, CFG, INT_CFG | OVR_FLOW_IEN | KE_IEN);
+
+ if (ret < 0) {
+ dev_err(&client->dev, "Write Error\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int __devinit adp5588_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct adp5588_kpad *kpad;
+ struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+ struct input_dev *input;
+ unsigned int revid;
+ int ret, i;
+ int error;
+
+ if (!i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_BYTE_DATA)) {
+ dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+ return -EIO;
+ }
+
+ if (!pdata) {
+ dev_err(&client->dev, "no platform data?\n");
+ return -EINVAL;
+ }
+
+ if (!pdata->rows || !pdata->cols || !pdata->keymap) {
+ dev_err(&client->dev, "no rows, cols or keymap from pdata\n");
+ return -EINVAL;
+ }
+
+ if (pdata->keymapsize != ADP5588_KEYMAPSIZE) {
+ dev_err(&client->dev, "invalid keymapsize\n");
+ return -EINVAL;
+ }
+
+ if (!client->irq) {
+ dev_err(&client->dev, "no IRQ?\n");
+ return -EINVAL;
+ }
+
+ kpad = kzalloc(sizeof(*kpad), GFP_KERNEL);
+ input = input_allocate_device();
+ if (!kpad || !input) {
+ error = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ kpad->client = client;
+ kpad->input = input;
+ INIT_DELAYED_WORK(&kpad->work, adp5588_work);
+
+ ret = adp5588_read(client, DEV_ID);
+ if (ret < 0) {
+ error = ret;
+ goto err_free_mem;
+ }
+
+ revid = (u8) ret & ADP5588_DEVICE_ID_MASK;
+ if (WA_DELAYED_READOUT_REVID(revid))
+ kpad->delay = msecs_to_jiffies(30);
+
+ input->name = client->name;
+ input->phys = "adp5588-keys/input0";
+ input->dev.parent = &client->dev;
+
+ input_set_drvdata(input, kpad);
+
+ input->id.bustype = BUS_I2C;
+ input->id.vendor = 0x0001;
+ input->id.product = 0x0001;
+ input->id.version = revid;
+
+ input->keycodesize = sizeof(kpad->keycode[0]);
+ input->keycodemax = pdata->keymapsize;
+ input->keycode = kpad->keycode;
+
+ memcpy(kpad->keycode, pdata->keymap,
+ pdata->keymapsize * input->keycodesize);
+
+ /* setup input device */
+ __set_bit(EV_KEY, input->evbit);
+
+ if (pdata->repeat)
+ __set_bit(EV_REP, input->evbit);
+
+ for (i = 0; i < input->keycodemax; i++)
+ __set_bit(kpad->keycode[i] & KEY_MAX, input->keybit);
+ __clear_bit(KEY_RESERVED, input->keybit);
+
+ error = input_register_device(input);
+ if (error) {
+ dev_err(&client->dev, "unable to register input device\n");
+ goto err_free_mem;
+ }
+
+ error = request_irq(client->irq, adp5588_irq,
+ IRQF_TRIGGER_FALLING | IRQF_DISABLED,
+ client->dev.driver->name, kpad);
+ if (error) {
+ dev_err(&client->dev, "irq %d busy?\n", client->irq);
+ goto err_unreg_dev;
+ }
+
+ error = adp5588_setup(client);
+ if (error)
+ goto err_free_irq;
+
+ device_init_wakeup(&client->dev, 1);
+ i2c_set_clientdata(client, kpad);
+
+ dev_info(&client->dev, "Rev.%d keypad, irq %d\n", revid, client->irq);
+ return 0;
+
+ err_free_irq:
+ free_irq(client->irq, kpad);
+ err_unreg_dev:
+ input_unregister_device(input);
+ input = NULL;
+ err_free_mem:
+ input_free_device(input);
+ kfree(kpad);
+
+ return error;
+}
+
+static int __devexit adp5588_remove(struct i2c_client *client)
+{
+ struct adp5588_kpad *kpad = i2c_get_clientdata(client);
+
+ adp5588_write(client, CFG, 0);
+ free_irq(client->irq, kpad);
+ cancel_delayed_work_sync(&kpad->work);
+ input_unregister_device(kpad->input);
+ i2c_set_clientdata(client, NULL);
+ kfree(kpad);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int adp5588_suspend(struct device *dev)
+{
+ struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+ struct i2c_client *client = kpad->client;
+
+ disable_irq(client->irq);
+ cancel_delayed_work_sync(&kpad->work);
+
+ if (device_may_wakeup(&client->dev))
+ enable_irq_wake(client->irq);
+
+ return 0;
+}
+
+static int adp5588_resume(struct device *dev)
+{
+ struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+ struct i2c_client *client = kpad->client;
+
+ if (device_may_wakeup(&client->dev))
+ disable_irq_wake(client->irq);
+
+ enable_irq(client->irq);
+
+ return 0;
+}
+
+static struct dev_pm_ops adp5588_dev_pm_ops = {
+ .suspend = adp5588_suspend,
+ .resume = adp5588_resume,
+};
+#endif
+
+static const struct i2c_device_id adp5588_id[] = {
+ { KBUILD_MODNAME, 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, adp5588_id);
+
+static struct i2c_driver adp5588_driver = {
+ .driver = {
+ .name = KBUILD_MODNAME,
+#ifdef CONFIG_PM
+ .pm = &adp5588_dev_pm_ops,
+#endif
+ },
+ .probe = adp5588_probe,
+ .remove = __devexit_p(adp5588_remove),
+ .id_table = adp5588_id,
+};
+
+static int __init adp5588_init(void)
+{
+ return i2c_add_driver(&adp5588_driver);
+}
+module_init(adp5588_init);
+
+static void __exit adp5588_exit(void)
+{
+ i2c_del_driver(&adp5588_driver);
+}
+module_exit(adp5588_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("ADP5588 Keypad driver");
+MODULE_ALIAS("platform:adp5588-keys");
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index adb09e2ba39..4709e15af60 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -773,23 +773,6 @@ static int atkbd_select_set(struct atkbd *atkbd, int target_set, int allow_extra
static int atkbd_activate(struct atkbd *atkbd)
{
struct ps2dev *ps2dev = &atkbd->ps2dev;
- unsigned char param[1];
-
-/*
- * Set the LEDs to a defined state.
- */
-
- param[0] = 0;
- if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS))
- return -1;
-
-/*
- * Set autorepeat to fastest possible.
- */
-
- param[0] = 0;
- if (ps2_command(ps2dev, param, ATKBD_CMD_SETREP))
- return -1;
/*
* Enable the keyboard to receive keystrokes.
@@ -1158,14 +1141,6 @@ static int atkbd_reconnect(struct serio *serio)
return -1;
atkbd_activate(atkbd);
-
-/*
- * Restore repeat rate and LEDs (that were reset by atkbd_activate)
- * to pre-resume state
- */
- if (!atkbd->softrepeat)
- atkbd_set_repeat_rate(atkbd);
- atkbd_set_leds(atkbd);
}
atkbd_enable(atkbd);
diff --git a/drivers/input/keyboard/max7359_keypad.c b/drivers/input/keyboard/max7359_keypad.c
new file mode 100644
index 00000000000..3b5b948eba3
--- /dev/null
+++ b/drivers/input/keyboard/max7359_keypad.c
@@ -0,0 +1,330 @@
+/*
+ * max7359_keypad.c - MAX7359 Key Switch Controller Driver
+ *
+ * Copyright (C) 2009 Samsung Electronics
+ * Kim Kyuwon <q1.kim@samsung.com>
+ *
+ * Based on pxa27x_keypad.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Datasheet: http://www.maxim-ic.com/quick_view2.cfm/qv_pk/5456
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/input/matrix_keypad.h>
+
+#define MAX7359_MAX_KEY_ROWS 8
+#define MAX7359_MAX_KEY_COLS 8
+#define MAX7359_MAX_KEY_NUM (MAX7359_MAX_KEY_ROWS * MAX7359_MAX_KEY_COLS)
+#define MAX7359_ROW_SHIFT 3
+
+/*
+ * MAX7359 registers
+ */
+#define MAX7359_REG_KEYFIFO 0x00
+#define MAX7359_REG_CONFIG 0x01
+#define MAX7359_REG_DEBOUNCE 0x02
+#define MAX7359_REG_INTERRUPT 0x03
+#define MAX7359_REG_PORTS 0x04
+#define MAX7359_REG_KEYREP 0x05
+#define MAX7359_REG_SLEEP 0x06
+
+/*
+ * Configuration register bits
+ */
+#define MAX7359_CFG_SLEEP (1 << 7)
+#define MAX7359_CFG_INTERRUPT (1 << 5)
+#define MAX7359_CFG_KEY_RELEASE (1 << 3)
+#define MAX7359_CFG_WAKEUP (1 << 1)
+#define MAX7359_CFG_TIMEOUT (1 << 0)
+
+/*
+ * Autosleep register values (ms)
+ */
+#define MAX7359_AUTOSLEEP_8192 0x01
+#define MAX7359_AUTOSLEEP_4096 0x02
+#define MAX7359_AUTOSLEEP_2048 0x03
+#define MAX7359_AUTOSLEEP_1024 0x04
+#define MAX7359_AUTOSLEEP_512 0x05
+#define MAX7359_AUTOSLEEP_256 0x06
+
+struct max7359_keypad {
+ /* matrix key code map */
+ unsigned short keycodes[MAX7359_MAX_KEY_NUM];
+
+ struct input_dev *input_dev;
+ struct i2c_client *client;
+};
+
+static int max7359_write_reg(struct i2c_client *client, u8 reg, u8 val)
+{
+ int ret = i2c_smbus_write_byte_data(client, reg, val);
+
+ if (ret < 0)
+ dev_err(&client->dev, "%s: reg 0x%x, val 0x%x, err %d\n",
+ __func__, reg, val, ret);
+ return ret;
+}
+
+static int max7359_read_reg(struct i2c_client *client, int reg)
+{
+ int ret = i2c_smbus_read_byte_data(client, reg);
+
+ if (ret < 0)
+ dev_err(&client->dev, "%s: reg 0x%x, err %d\n",
+ __func__, reg, ret);
+ return ret;
+}
+
+static void max7359_build_keycode(struct max7359_keypad *keypad,
+ const struct matrix_keymap_data *keymap_data)
+{
+ struct input_dev *input_dev = keypad->input_dev;
+ int i;
+
+ for (i = 0; i < keymap_data->keymap_size; i++) {
+ unsigned int key = keymap_data->keymap[i];
+ unsigned int row = KEY_ROW(key);
+ unsigned int col = KEY_COL(key);
+ unsigned int scancode = MATRIX_SCAN_CODE(row, col,
+ MAX7359_ROW_SHIFT);
+ unsigned short keycode = KEY_VAL(key);
+
+ keypad->keycodes[scancode] = keycode;
+ __set_bit(keycode, input_dev->keybit);
+ }
+ __clear_bit(KEY_RESERVED, input_dev->keybit);
+}
+
+/* runs in an IRQ thread -- can (and will!) sleep */
+static irqreturn_t max7359_interrupt(int irq, void *dev_id)
+{
+ struct max7359_keypad *keypad = dev_id;
+ struct input_dev *input_dev = keypad->input_dev;
+ int val, row, col, release, code;
+
+ val = max7359_read_reg(keypad->client, MAX7359_REG_KEYFIFO);
+ row = val & 0x7;
+ col = (val >> 3) & 0x7;
+ release = val & 0x40;
+
+ code = MATRIX_SCAN_CODE(row, col, MAX7359_ROW_SHIFT);
+
+ dev_dbg(&keypad->client->dev,
+ "key[%d:%d] %s\n", row, col, release ? "release" : "press");
+
+ input_event(input_dev, EV_MSC, MSC_SCAN, code);
+ input_report_key(input_dev, keypad->keycodes[code], !release);
+ input_sync(input_dev);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Let MAX7359 fall into a deep sleep:
+ * If no keys are pressed, enter sleep mode for 8192 ms. And if any
+ * key is pressed, the MAX7359 returns to normal operating mode.
+ */
+static inline void max7359_fall_deepsleep(struct i2c_client *client)
+{
+ max7359_write_reg(client, MAX7359_REG_SLEEP, MAX7359_AUTOSLEEP_8192);
+}
+
+/*
+ * Let MAX7359 take a catnap:
+ * Autosleep just for 256 ms.
+ */
+static inline void max7359_take_catnap(struct i2c_client *client)
+{
+ max7359_write_reg(client, MAX7359_REG_SLEEP, MAX7359_AUTOSLEEP_256);
+}
+
+static int max7359_open(struct input_dev *dev)
+{
+ struct max7359_keypad *keypad = input_get_drvdata(dev);
+
+ max7359_take_catnap(keypad->client);
+
+ return 0;
+}
+
+static void max7359_close(struct input_dev *dev)
+{
+ struct max7359_keypad *keypad = input_get_drvdata(dev);
+
+ max7359_fall_deepsleep(keypad->client);
+}
+
+static void max7359_initialize(struct i2c_client *client)
+{
+ max7359_write_reg(client, MAX7359_REG_CONFIG,
+ MAX7359_CFG_INTERRUPT | /* Irq clears after host read */
+ MAX7359_CFG_KEY_RELEASE | /* Key release enable */
+ MAX7359_CFG_WAKEUP); /* Key press wakeup enable */
+
+ /* Full key-scan functionality */
+ max7359_write_reg(client, MAX7359_REG_DEBOUNCE, 0x1F);
+
+ /* nINT asserts every debounce cycles */
+ max7359_write_reg(client, MAX7359_REG_INTERRUPT, 0x01);
+
+ max7359_fall_deepsleep(client);
+}
+
+static int __devinit max7359_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ const struct matrix_keymap_data *keymap_data = client->dev.platform_data;
+ struct max7359_keypad *keypad;
+ struct input_dev *input_dev;
+ int ret;
+ int error;
+
+ if (!client->irq) {
+ dev_err(&client->dev, "The irq number should not be zero\n");
+ return -EINVAL;
+ }
+
+ /* Detect MAX7359: The initial Keys FIFO value is '0x3F' */
+ ret = max7359_read_reg(client, MAX7359_REG_KEYFIFO);
+ if (ret < 0) {
+ dev_err(&client->dev, "failed to detect device\n");
+ return -ENODEV;
+ }
+
+ dev_dbg(&client->dev, "keys FIFO is 0x%02x\n", ret);
+
+ keypad = kzalloc(sizeof(struct max7359_keypad), GFP_KERNEL);
+ input_dev = input_allocate_device();
+ if (!keypad || !input_dev) {
+ dev_err(&client->dev, "failed to allocate memory\n");
+ error = -ENOMEM;
+ goto failed_free_mem;
+ }
+
+ keypad->client = client;
+ keypad->input_dev = input_dev;
+
+ input_dev->name = client->name;
+ input_dev->id.bustype = BUS_I2C;
+ input_dev->open = max7359_open;
+ input_dev->close = max7359_close;
+ input_dev->dev.parent = &client->dev;
+
+ input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+ input_dev->keycodesize = sizeof(keypad->keycodes[0]);
+ input_dev->keycodemax = ARRAY_SIZE(keypad->keycodes);
+ input_dev->keycode = keypad->keycodes;
+
+ input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+ input_set_drvdata(input_dev, keypad);
+
+ max7359_build_keycode(keypad, keymap_data);
+
+ error = request_threaded_irq(client->irq, NULL, max7359_interrupt,
+ IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+ client->name, keypad);
+ if (error) {
+ dev_err(&client->dev, "failed to register interrupt\n");
+ goto failed_free_mem;
+ }
+
+ /* Register the input device */
+ error = input_register_device(input_dev);
+ if (error) {
+ dev_err(&client->dev, "failed to register input device\n");
+ goto failed_free_irq;
+ }
+
+ /* Initialize MAX7359 */
+ max7359_initialize(client);
+
+ i2c_set_clientdata(client, keypad);
+ device_init_wakeup(&client->dev, 1);
+
+ return 0;
+
+failed_free_irq:
+ free_irq(client->irq, keypad);
+failed_free_mem:
+ input_free_device(input_dev);
+ kfree(keypad);
+ return error;
+}
+
+static int __devexit max7359_remove(struct i2c_client *client)
+{
+ struct max7359_keypad *keypad = i2c_get_clientdata(client);
+
+ free_irq(client->irq, keypad);
+ input_unregister_device(keypad->input_dev);
+ i2c_set_clientdata(client, NULL);
+ kfree(keypad);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int max7359_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+ max7359_fall_deepsleep(client);
+
+ if (device_may_wakeup(&client->dev))
+ enable_irq_wake(client->irq);
+
+ return 0;
+}
+
+static int max7359_resume(struct i2c_client *client)
+{
+ if (device_may_wakeup(&client->dev))
+ disable_irq_wake(client->irq);
+
+ /* Restore the default setting */
+ max7359_take_catnap(client);
+
+ return 0;
+}
+#else
+#define max7359_suspend NULL
+#define max7359_resume NULL
+#endif
+
+static const struct i2c_device_id max7359_ids[] = {
+ { "max7359", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, max7359_ids);
+
+static struct i2c_driver max7359_i2c_driver = {
+ .driver = {
+ .name = "max7359",
+ },
+ .probe = max7359_probe,
+ .remove = __devexit_p(max7359_remove),
+ .suspend = max7359_suspend,
+ .resume = max7359_resume,
+ .id_table = max7359_ids,
+};
+
+static int __init max7359_init(void)
+{
+ return i2c_add_driver(&max7359_i2c_driver);
+}
+module_init(max7359_init);
+
+static void __exit max7359_exit(void)
+{
+ i2c_del_driver(&max7359_i2c_driver);
+}
+module_exit(max7359_exit);
+
+MODULE_AUTHOR("Kim Kyuwon <q1.kim@samsung.com>");
+MODULE_DESCRIPTION("MAX7359 Key Switch Controller Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/input/keyboard/opencores-kbd.c b/drivers/input/keyboard/opencores-kbd.c
new file mode 100644
index 00000000000..78cccddbf55
--- /dev/null
+++ b/drivers/input/keyboard/opencores-kbd.c
@@ -0,0 +1,180 @@
+/*
+ * OpenCores Keyboard Controller Driver
+ * http://www.opencores.org/project,keyboardcontroller
+ *
+ * Copyright 2007-2009 HV Sistemas S.L.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+struct opencores_kbd {
+ struct input_dev *input;
+ struct resource *addr_res;
+ void __iomem *addr;
+ int irq;
+ unsigned short keycodes[128];
+};
+
+static irqreturn_t opencores_kbd_isr(int irq, void *dev_id)
+{
+ struct opencores_kbd *opencores_kbd = dev_id;
+ struct input_dev *input = opencores_kbd->input;
+ unsigned char c;
+
+ c = readb(opencores_kbd->addr);
+ input_report_key(input, c & 0x7f, c & 0x80 ? 0 : 1);
+ input_sync(input);
+
+ return IRQ_HANDLED;
+}
+
+static int __devinit opencores_kbd_probe(struct platform_device *pdev)
+{
+ struct input_dev *input;
+ struct opencores_kbd *opencores_kbd;
+ struct resource *res;
+ int irq, i, error;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "missing board memory resource\n");
+ return -EINVAL;
+ }
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "missing board IRQ resource\n");
+ return -EINVAL;
+ }
+
+ opencores_kbd = kzalloc(sizeof(*opencores_kbd), GFP_KERNEL);
+ input = input_allocate_device();
+ if (!opencores_kbd || !input) {
+ dev_err(&pdev->dev, "failed to allocate device structures\n");
+ error = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ opencores_kbd->addr_res = res;
+ res = request_mem_region(res->start, resource_size(res), pdev->name);
+ if (!res) {
+ dev_err(&pdev->dev, "failed to request I/O memory\n");
+ error = -EBUSY;
+ goto err_free_mem;
+ }
+
+ opencores_kbd->addr = ioremap(res->start, resource_size(res));
+ if (!opencores_kbd->addr) {
+ dev_err(&pdev->dev, "failed to remap I/O memory\n");
+ error = -ENXIO;
+ goto err_rel_mem;
+ }
+
+ opencores_kbd->input = input;
+ opencores_kbd->irq = irq;
+
+ input->name = pdev->name;
+ input->phys = "opencores-kbd/input0";
+ input->dev.parent = &pdev->dev;
+
+ input_set_drvdata(input, opencores_kbd);
+
+ input->id.bustype = BUS_HOST;
+ input->id.vendor = 0x0001;
+ input->id.product = 0x0001;
+ input->id.version = 0x0100;
+
+ input->keycode = opencores_kbd->keycodes;
+ input->keycodesize = sizeof(opencores_kbd->keycodes[0]);
+ input->keycodemax = ARRAY_SIZE(opencores_kbd->keycodes);
+
+ __set_bit(EV_KEY, input->evbit);
+
+ for (i = 0; i < ARRAY_SIZE(opencores_kbd->keycodes); i++) {
+ /*
+ * OpenCores controller happens to have scancodes match
+ * our KEY_* definitions.
+ */
+ opencores_kbd->keycodes[i] = i;
+ __set_bit(opencores_kbd->keycodes[i], input->keybit);
+ }
+ __clear_bit(KEY_RESERVED, input->keybit);
+
+ error = request_irq(irq, &opencores_kbd_isr,
+ IRQF_TRIGGER_RISING, pdev->name, opencores_kbd);
+ if (error) {
+ dev_err(&pdev->dev, "unable to claim irq %d\n", irq);
+ goto err_unmap_mem;
+ }
+
+ error = input_register_device(input);
+ if (error) {
+ dev_err(&pdev->dev, "unable to register input device\n");
+ goto err_free_irq;
+ }
+
+ platform_set_drvdata(pdev, opencores_kbd);
+
+ return 0;
+
+ err_free_irq:
+ free_irq(irq, opencores_kbd);
+ err_unmap_mem:
+ iounmap(opencores_kbd->addr);
+ err_rel_mem:
+ release_mem_region(res->start, resource_size(res));
+ err_free_mem:
+ input_free_device(input);
+ kfree(opencores_kbd);
+
+ return error;
+}
+
+static int __devexit opencores_kbd_remove(struct platform_device *pdev)
+{
+ struct opencores_kbd *opencores_kbd = platform_get_drvdata(pdev);
+
+ free_irq(opencores_kbd->irq, opencores_kbd);
+
+ iounmap(opencores_kbd->addr);
+ release_mem_region(opencores_kbd->addr_res->start,
+ resource_size(opencores_kbd->addr_res));
+ input_unregister_device(opencores_kbd->input);
+ kfree(opencores_kbd);
+
+ platform_set_drvdata(pdev, NULL);
+
+ return 0;
+}
+
+static struct platform_driver opencores_kbd_device_driver = {
+ .probe = opencores_kbd_probe,
+ .remove = __devexit_p(opencores_kbd_remove),
+ .driver = {
+ .name = "opencores-kbd",
+ },
+};
+
+static int __init opencores_kbd_init(void)
+{
+ return platform_driver_register(&opencores_kbd_device_driver);
+}
+module_init(opencores_kbd_init);
+
+static void __exit opencores_kbd_exit(void)
+{
+ platform_driver_unregister(&opencores_kbd_device_driver);
+}
+module_exit(opencores_kbd_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Javier Herrero <jherrero@hvsistemas.es>");
+MODULE_DESCRIPTION("Keyboard driver for OpenCores Keyboard Controller");
diff --git a/drivers/input/keyboard/qt2160.c b/drivers/input/keyboard/qt2160.c
new file mode 100644
index 00000000000..191cc51d6cf
--- /dev/null
+++ b/drivers/input/keyboard/qt2160.c
@@ -0,0 +1,397 @@
+/*
+ * qt2160.c - Atmel AT42QT2160 Touch Sense Controller
+ *
+ * Copyright (C) 2009 Raphael Derosso Pereira <raphaelpereira@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+
+#define QT2160_VALID_CHIPID 0x11
+
+#define QT2160_CMD_CHIPID 0
+#define QT2160_CMD_CODEVER 1
+#define QT2160_CMD_GSTAT 2
+#define QT2160_CMD_KEYS3 3
+#define QT2160_CMD_KEYS4 4
+#define QT2160_CMD_SLIDE 5
+#define QT2160_CMD_GPIOS 6
+#define QT2160_CMD_SUBVER 7
+#define QT2160_CMD_CALIBRATE 10
+
+#define QT2160_CYCLE_INTERVAL (2*HZ)
+
+static unsigned char qt2160_key2code[] = {
+ KEY_0, KEY_1, KEY_2, KEY_3,
+ KEY_4, KEY_5, KEY_6, KEY_7,
+ KEY_8, KEY_9, KEY_A, KEY_B,
+ KEY_C, KEY_D, KEY_E, KEY_F,
+};
+
+struct qt2160_data {
+ struct i2c_client *client;
+ struct input_dev *input;
+ struct delayed_work dwork;
+ spinlock_t lock; /* Protects canceling/rescheduling of dwork */
+ unsigned short keycodes[ARRAY_SIZE(qt2160_key2code)];
+ u16 key_matrix;
+};
+
+static int qt2160_read_block(struct i2c_client *client,
+ u8 inireg, u8 *buffer, unsigned int count)
+{
+ int error, idx = 0;
+
+ /*
+ * Can't use SMBus block data read. Check for I2C functionality to speed
+ * things up whenever possible. Otherwise we will be forced to read
+ * sequentially.
+ */
+ if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+
+ error = i2c_smbus_write_byte(client, inireg + idx);
+ if (error) {
+ dev_err(&client->dev,
+ "couldn't send request. Returned %d\n", error);
+ return error;
+ }
+
+ error = i2c_master_recv(client, buffer, count);
+ if (error != count) {
+ dev_err(&client->dev,
+ "couldn't read registers. Returned %d bytes\n", error);
+ return error;
+ }
+ } else {
+
+ while (count--) {
+ int data;
+
+ error = i2c_smbus_write_byte(client, inireg + idx);
+ if (error) {
+ dev_err(&client->dev,
+ "couldn't send request. Returned %d\n", error);
+ return error;
+ }
+
+ data = i2c_smbus_read_byte(client);
+ if (data < 0) {
+ dev_err(&client->dev,
+ "couldn't read register. Returned %d\n", data);
+ return data;
+ }
+
+ buffer[idx++] = data;
+ }
+ }
+
+ return 0;
+}
+
+static int qt2160_get_key_matrix(struct qt2160_data *qt2160)
+{
+ struct i2c_client *client = qt2160->client;
+ struct input_dev *input = qt2160->input;
+ u8 regs[6];
+ u16 old_matrix, new_matrix;
+ int ret, i, mask;
+
+ dev_dbg(&client->dev, "requesting keys...\n");
+
+ /*
+ * Read all registers from General Status Register
+ * to GPIOs register
+ */
+ ret = qt2160_read_block(client, QT2160_CMD_GSTAT, regs, 6);
+ if (ret) {
+ dev_err(&client->dev,
+ "could not perform chip read.\n");
+ return ret;
+ }
+
+ old_matrix = qt2160->key_matrix;
+ qt2160->key_matrix = new_matrix = (regs[2] << 8) | regs[1];
+
+ mask = 0x01;
+ for (i = 0; i < 16; ++i, mask <<= 1) {
+ int keyval = new_matrix & mask;
+
+ if ((old_matrix & mask) != keyval) {
+ input_report_key(input, qt2160->keycodes[i], keyval);
+ dev_dbg(&client->dev, "key %d %s\n",
+ i, keyval ? "pressed" : "released");
+ }
+ }
+
+ input_sync(input);
+
+ return 0;
+}
+
+static irqreturn_t qt2160_irq(int irq, void *_qt2160)
+{
+ struct qt2160_data *qt2160 = _qt2160;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qt2160->lock, flags);
+
+ __cancel_delayed_work(&qt2160->dwork);
+ schedule_delayed_work(&qt2160->dwork, 0);
+
+ spin_unlock_irqrestore(&qt2160->lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static void qt2160_schedule_read(struct qt2160_data *qt2160)
+{
+ spin_lock_irq(&qt2160->lock);
+ schedule_delayed_work(&qt2160->dwork, QT2160_CYCLE_INTERVAL);
+ spin_unlock_irq(&qt2160->lock);
+}
+
+static void qt2160_worker(struct work_struct *work)
+{
+ struct qt2160_data *qt2160 =
+ container_of(work, struct qt2160_data, dwork.work);
+
+ dev_dbg(&qt2160->client->dev, "worker\n");
+
+ qt2160_get_key_matrix(qt2160);
+
+ /* Avoid device lock up by checking every so often */
+ qt2160_schedule_read(qt2160);
+}
+
+static int __devinit qt2160_read(struct i2c_client *client, u8 reg)
+{
+ int ret;
+
+ ret = i2c_smbus_write_byte(client, reg);
+ if (ret) {
+ dev_err(&client->dev,
+ "couldn't send request. Returned %d\n", ret);
+ return ret;
+ }
+
+ ret = i2c_smbus_read_byte(client);
+ if (ret < 0) {
+ dev_err(&client->dev,
+ "couldn't read register. Returned %d\n", ret);
+ return ret;
+ }
+
+ return ret;
+}
+
+static int __devinit qt2160_write(struct i2c_client *client, u8 reg, u8 data)
+{
+ int error;
+
+ error = i2c_smbus_write_byte(client, reg);
+ if (error) {
+ dev_err(&client->dev,
+ "couldn't send request. Returned %d\n", error);
+ return error;
+ }
+
+ error = i2c_smbus_write_byte(client, data);
+ if (error) {
+ dev_err(&client->dev,
+ "couldn't write data. Returned %d\n", error);
+ return error;
+ }
+
+ return error;
+}
+
+
+static bool __devinit qt2160_identify(struct i2c_client *client)
+{
+ int id, ver, rev;
+
+ /* Read Chid ID to check if chip is valid */
+ id = qt2160_read(client, QT2160_CMD_CHIPID);
+ if (id != QT2160_VALID_CHIPID) {
+ dev_err(&client->dev, "ID %d not supported\n", id);
+ return false;
+ }
+
+ /* Read chip firmware version */
+ ver = qt2160_read(client, QT2160_CMD_CODEVER);
+ if (ver < 0) {
+ dev_err(&client->dev, "could not get firmware version\n");
+ return false;
+ }
+
+ /* Read chip firmware revision */
+ rev = qt2160_read(client, QT2160_CMD_SUBVER);
+ if (rev < 0) {
+ dev_err(&client->dev, "could not get firmware revision\n");
+ return false;
+ }
+
+ dev_info(&client->dev, "AT42QT2160 firmware version %d.%d.%d\n",
+ ver >> 4, ver & 0xf, rev);
+
+ return true;
+}
+
+static int __devinit qt2160_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct qt2160_data *qt2160;
+ struct input_dev *input;
+ int i;
+ int error;
+
+ /* Check functionality */
+ error = i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_BYTE);
+ if (!error) {
+ dev_err(&client->dev, "%s adapter not supported\n",
+ dev_driver_string(&client->adapter->dev));
+ return -ENODEV;
+ }
+
+ if (!qt2160_identify(client))
+ return -ENODEV;
+
+ /* Chip is valid and active. Allocate structure */
+ qt2160 = kzalloc(sizeof(struct qt2160_data), GFP_KERNEL);
+ input = input_allocate_device();
+ if (!qt2160 || !input) {
+ dev_err(&client->dev, "insufficient memory\n");
+ error = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ qt2160->client = client;
+ qt2160->input = input;
+ INIT_DELAYED_WORK(&qt2160->dwork, qt2160_worker);
+ spin_lock_init(&qt2160->lock);
+
+ input->name = "AT42QT2160 Touch Sense Keyboard";
+ input->id.bustype = BUS_I2C;
+
+ input->keycode = qt2160->keycodes;
+ input->keycodesize = sizeof(qt2160->keycodes[0]);
+ input->keycodemax = ARRAY_SIZE(qt2160_key2code);
+
+ __set_bit(EV_KEY, input->evbit);
+ __clear_bit(EV_REP, input->evbit);
+ for (i = 0; i < ARRAY_SIZE(qt2160_key2code); i++) {
+ qt2160->keycodes[i] = qt2160_key2code[i];
+ __set_bit(qt2160_key2code[i], input->keybit);
+ }
+ __clear_bit(KEY_RESERVED, input->keybit);
+
+ /* Calibrate device */
+ error = qt2160_write(client, QT2160_CMD_CALIBRATE, 1);
+ if (error) {
+ dev_err(&client->dev, "failed to calibrate device\n");
+ goto err_free_mem;
+ }
+
+ if (client->irq) {
+ error = request_irq(client->irq, qt2160_irq,
+ IRQF_TRIGGER_FALLING, "qt2160", qt2160);
+ if (error) {
+ dev_err(&client->dev,
+ "failed to allocate irq %d\n", client->irq);
+ goto err_free_mem;
+ }
+ }
+
+ error = input_register_device(qt2160->input);
+ if (error) {
+ dev_err(&client->dev,
+ "Failed to register input device\n");
+ goto err_free_irq;
+ }
+
+ i2c_set_clientdata(client, qt2160);
+ qt2160_schedule_read(qt2160);
+
+ return 0;
+
+err_free_irq:
+ if (client->irq)
+ free_irq(client->irq, qt2160);
+err_free_mem:
+ input_free_device(input);
+ kfree(qt2160);
+ return error;
+}
+
+static int __devexit qt2160_remove(struct i2c_client *client)
+{
+ struct qt2160_data *qt2160 = i2c_get_clientdata(client);
+
+ /* Release IRQ so no queue will be scheduled */
+ if (client->irq)
+ free_irq(client->irq, qt2160);
+
+ cancel_delayed_work_sync(&qt2160->dwork);
+
+ input_unregister_device(qt2160->input);
+ kfree(qt2160);
+
+ i2c_set_clientdata(client, NULL);
+ return 0;
+}
+
+static struct i2c_device_id qt2160_idtable[] = {
+ { "qt2160", 0, },
+ { }
+};
+
+MODULE_DEVICE_TABLE(i2c, qt2160_idtable);
+
+static struct i2c_driver qt2160_driver = {
+ .driver = {
+ .name = "qt2160",
+ .owner = THIS_MODULE,
+ },
+
+ .id_table = qt2160_idtable,
+ .probe = qt2160_probe,
+ .remove = __devexit_p(qt2160_remove),
+};
+
+static int __init qt2160_init(void)
+{
+ return i2c_add_driver(&qt2160_driver);
+}
+module_init(qt2160_init);
+
+static void __exit qt2160_cleanup(void)
+{
+ i2c_del_driver(&qt2160_driver);
+}
+module_exit(qt2160_cleanup);
+
+MODULE_AUTHOR("Raphael Derosso Pereira <raphaelpereira@gmail.com>");
+MODULE_DESCRIPTION("Driver for AT42QT2160 Touch Sensor");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 76d6751f89a..02f4f8f1db6 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -225,6 +225,7 @@ config INPUT_SGI_BTNS
config INPUT_WINBOND_CIR
tristate "Winbond IR remote control"
depends on X86 && PNP
+ select NEW_LEDS
select LEDS_CLASS
select BITREVERSE
help
diff --git a/drivers/input/misc/dm355evm_keys.c b/drivers/input/misc/dm355evm_keys.c
index 0918acae584..f2b67dc81d8 100644
--- a/drivers/input/misc/dm355evm_keys.c
+++ b/drivers/input/misc/dm355evm_keys.c
@@ -96,7 +96,13 @@ static struct {
{ 0x3169, KEY_PAUSE, },
};
-/* runs in an IRQ thread -- can (and will!) sleep */
+/*
+ * Because we communicate with the MSP430 using I2C, and all I2C calls
+ * in Linux sleep, we use a threaded IRQ handler. The IRQ itself is
+ * active low, but we go through the GPIO controller so we can trigger
+ * on falling edges and not worry about enabling/disabling the IRQ in
+ * the keypress handling path.
+ */
static irqreturn_t dm355evm_keys_irq(int irq, void *_keys)
{
struct dm355evm_keys *keys = _keys;
@@ -171,18 +177,6 @@ static irqreturn_t dm355evm_keys_irq(int irq, void *_keys)
return IRQ_HANDLED;
}
-/*
- * Because we communicate with the MSP430 using I2C, and all I2C calls
- * in Linux sleep, we use a threaded IRQ handler. The IRQ itself is
- * active low, but we go through the GPIO controller so we can trigger
- * on falling edges and not worry about enabling/disabling the IRQ in
- * the keypress handling path.
- */
-static irqreturn_t dm355evm_keys_hardirq(int irq, void *_keys)
-{
- return IRQ_WAKE_THREAD;
-}
-
static int dm355evm_setkeycode(struct input_dev *dev, int index, int keycode)
{
u16 old_keycode;
@@ -257,10 +251,8 @@ static int __devinit dm355evm_keys_probe(struct platform_device *pdev)
/* REVISIT: flush the event queue? */
- status = request_threaded_irq(keys->irq,
- dm355evm_keys_hardirq, dm355evm_keys_irq,
- IRQF_TRIGGER_FALLING,
- dev_name(&pdev->dev), keys);
+ status = request_threaded_irq(keys->irq, NULL, dm355evm_keys_irq,
+ IRQF_TRIGGER_FALLING, dev_name(&pdev->dev), keys);
if (status < 0)
goto fail1;
diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c
index 84e2fc04d11..f84cbd97c88 100644
--- a/drivers/input/mouse/sentelic.c
+++ b/drivers/input/mouse/sentelic.c
@@ -92,7 +92,8 @@ static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val)
*/
ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
- mutex_lock(&ps2dev->cmd_mutex);
+
+ ps2_begin_command(ps2dev);
if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
goto out;
@@ -126,7 +127,7 @@ static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val)
rc = 0;
out:
- mutex_unlock(&ps2dev->cmd_mutex);
+ ps2_end_command(ps2dev);
ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
dev_dbg(&ps2dev->serio->dev, "READ REG: 0x%02x is 0x%02x (rc = %d)\n",
@@ -140,7 +141,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
unsigned char v;
int rc = -1;
- mutex_lock(&ps2dev->cmd_mutex);
+ ps2_begin_command(ps2dev);
if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
goto out;
@@ -179,7 +180,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
rc = 0;
out:
- mutex_unlock(&ps2dev->cmd_mutex);
+ ps2_end_command(ps2dev);
dev_dbg(&ps2dev->serio->dev, "WRITE REG: 0x%02x to 0x%02x (rc = %d)\n",
reg_addr, reg_val, rc);
return rc;
@@ -214,7 +215,8 @@ static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val)
ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
- mutex_lock(&ps2dev->cmd_mutex);
+
+ ps2_begin_command(ps2dev);
if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
goto out;
@@ -236,7 +238,7 @@ static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val)
rc = 0;
out:
- mutex_unlock(&ps2dev->cmd_mutex);
+ ps2_end_command(ps2dev);
ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
dev_dbg(&ps2dev->serio->dev, "READ PAGE REG: 0x%02x (rc = %d)\n",
@@ -250,7 +252,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
unsigned char v;
int rc = -1;
- mutex_lock(&ps2dev->cmd_mutex);
+ ps2_begin_command(ps2dev);
if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
goto out;
@@ -275,7 +277,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
rc = 0;
out:
- mutex_unlock(&ps2dev->cmd_mutex);
+ ps2_end_command(ps2dev);
dev_dbg(&ps2dev->serio->dev, "WRITE PAGE REG: to 0x%02x (rc = %d)\n",
reg_val, rc);
return rc;
diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index eac9fdde7ee..7283c78044a 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -203,7 +203,7 @@ MODULE_PARM_DESC(no_filter, "No Filter. Default = 0 (off)");
* and the irq configuration should be set to Falling Edge Trigger
*/
/* Control IRQ / Polling option */
-static int polling_req;
+static bool polling_req;
module_param(polling_req, bool, 0444);
MODULE_PARM_DESC(polling_req, "Request Polling. Default = 0 (use irq)");
@@ -217,6 +217,7 @@ struct synaptics_i2c {
struct i2c_client *client;
struct input_dev *input;
struct delayed_work dwork;
+ spinlock_t lock;
int no_data_count;
int no_decel_param;
int reduce_report_param;
@@ -366,17 +367,28 @@ static bool synaptics_i2c_get_input(struct synaptics_i2c *touch)
return xy_delta || gesture;
}
-static irqreturn_t synaptics_i2c_irq(int irq, void *dev_id)
+static void synaptics_i2c_reschedule_work(struct synaptics_i2c *touch,
+ unsigned long delay)
{
- struct synaptics_i2c *touch = dev_id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&touch->lock, flags);
/*
- * We want to have the work run immediately but it might have
- * already been scheduled with a delay, that's why we have to
- * cancel it first.
+ * If work is already scheduled then subsequent schedules will not
+ * change the scheduled time that's why we have to cancel it first.
*/
- cancel_delayed_work(&touch->dwork);
- schedule_delayed_work(&touch->dwork, 0);
+ __cancel_delayed_work(&touch->dwork);
+ schedule_delayed_work(&touch->dwork, delay);
+
+ spin_unlock_irqrestore(&touch->lock, flags);
+}
+
+static irqreturn_t synaptics_i2c_irq(int irq, void *dev_id)
+{
+ struct synaptics_i2c *touch = dev_id;
+
+ synaptics_i2c_reschedule_work(touch, 0);
return IRQ_HANDLED;
}
@@ -452,7 +464,7 @@ static void synaptics_i2c_work_handler(struct work_struct *work)
* We poll the device once in THREAD_IRQ_SLEEP_SECS and
* if error is detected, we try to reset and reconfigure the touchpad.
*/
- schedule_delayed_work(&touch->dwork, delay);
+ synaptics_i2c_reschedule_work(touch, delay);
}
static int synaptics_i2c_open(struct input_dev *input)
@@ -465,8 +477,8 @@ static int synaptics_i2c_open(struct input_dev *input)
return ret;
if (polling_req)
- schedule_delayed_work(&touch->dwork,
- msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
+ synaptics_i2c_reschedule_work(touch,
+ msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
return 0;
}
@@ -521,6 +533,7 @@ struct synaptics_i2c *synaptics_i2c_touch_create(struct i2c_client *client)
touch->scan_rate_param = scan_rate;
set_scan_rate(touch, scan_rate);
INIT_DELAYED_WORK(&touch->dwork, synaptics_i2c_work_handler);
+ spin_lock_init(&touch->lock);
return touch;
}
@@ -535,14 +548,12 @@ static int __devinit synaptics_i2c_probe(struct i2c_client *client,
if (!touch)
return -ENOMEM;
- i2c_set_clientdata(client, touch);
-
ret = synaptics_i2c_reset_config(client);
if (ret)
goto err_mem_free;
if (client->irq < 1)
- polling_req = 1;
+ polling_req = true;
touch->input = input_allocate_device();
if (!touch->input) {
@@ -563,7 +574,7 @@ static int __devinit synaptics_i2c_probe(struct i2c_client *client,
dev_warn(&touch->client->dev,
"IRQ request failed: %d, "
"falling back to polling\n", ret);
- polling_req = 1;
+ polling_req = true;
synaptics_i2c_reg_set(touch->client,
INTERRUPT_EN_REG, 0);
}
@@ -580,12 +591,14 @@ static int __devinit synaptics_i2c_probe(struct i2c_client *client,
"Input device register failed: %d\n", ret);
goto err_input_free;
}
+
+ i2c_set_clientdata(client, touch);
+
return 0;
err_input_free:
input_free_device(touch->input);
err_mem_free:
- i2c_set_clientdata(client, NULL);
kfree(touch);
return ret;
@@ -596,7 +609,7 @@ static int __devexit synaptics_i2c_remove(struct i2c_client *client)
struct synaptics_i2c *touch = i2c_get_clientdata(client);
if (!polling_req)
- free_irq(touch->client->irq, touch);
+ free_irq(client->irq, touch);
input_unregister_device(touch->input);
i2c_set_clientdata(client, NULL);
@@ -627,8 +640,8 @@ static int synaptics_i2c_resume(struct i2c_client *client)
if (ret)
return ret;
- schedule_delayed_work(&touch->dwork,
- msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
+ synaptics_i2c_reschedule_work(touch,
+ msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
return 0;
}
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index eb3ff94af58..bc56e52b945 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -87,8 +87,22 @@ static bool i8042_bypass_aux_irq_test;
#include "i8042.h"
+/*
+ * i8042_lock protects serialization between i8042_command and
+ * the interrupt handler.
+ */
static DEFINE_SPINLOCK(i8042_lock);
+/*
+ * Writers to AUX and KBD ports as well as users issuing i8042_command
+ * directly should acquire i8042_mutex (by means of calling
+ * i8042_lock_chip() and i8042_unlock_ship() helpers) to ensure that
+ * they do not disturb each other (unfortunately in many i8042
+ * implementations write to one of the ports will immediately abort
+ * command that is being processed by another port).
+ */
+static DEFINE_MUTEX(i8042_mutex);
+
struct i8042_port {
struct serio *serio;
int irq;
@@ -113,6 +127,18 @@ static struct platform_device *i8042_platform_device;
static irqreturn_t i8042_interrupt(int irq, void *dev_id);
+void i8042_lock_chip(void)
+{
+ mutex_lock(&i8042_mutex);
+}
+EXPORT_SYMBOL(i8042_lock_chip);
+
+void i8042_unlock_chip(void)
+{
+ mutex_unlock(&i8042_mutex);
+}
+EXPORT_SYMBOL(i8042_unlock_chip);
+
/*
* The i8042_wait_read() and i8042_wait_write functions wait for the i8042 to
* be ready for reading values from it / writing values to it.
@@ -1161,6 +1187,21 @@ static void __devexit i8042_unregister_ports(void)
}
}
+/*
+ * Checks whether port belongs to i8042 controller.
+ */
+bool i8042_check_port_owner(const struct serio *port)
+{
+ int i;
+
+ for (i = 0; i < I8042_NUM_PORTS; i++)
+ if (i8042_ports[i].serio == port)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL(i8042_check_port_owner);
+
static void i8042_free_irqs(void)
{
if (i8042_aux_irq_registered)
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 3a95b508bf2..769ba65a585 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -17,6 +17,7 @@
#include <linux/interrupt.h>
#include <linux/input.h>
#include <linux/serio.h>
+#include <linux/i8042.h>
#include <linux/init.h>
#include <linux/libps2.h>
@@ -54,6 +55,24 @@ int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout)
}
EXPORT_SYMBOL(ps2_sendbyte);
+void ps2_begin_command(struct ps2dev *ps2dev)
+{
+ mutex_lock(&ps2dev->cmd_mutex);
+
+ if (i8042_check_port_owner(ps2dev->serio))
+ i8042_lock_chip();
+}
+EXPORT_SYMBOL(ps2_begin_command);
+
+void ps2_end_command(struct ps2dev *ps2dev)
+{
+ if (i8042_check_port_owner(ps2dev->serio))
+ i8042_unlock_chip();
+
+ mutex_unlock(&ps2dev->cmd_mutex);
+}
+EXPORT_SYMBOL(ps2_end_command);
+
/*
* ps2_drain() waits for device to transmit requested number of bytes
* and discards them.
@@ -66,7 +85,7 @@ void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout)
maxbytes = sizeof(ps2dev->cmdbuf);
}
- mutex_lock(&ps2dev->cmd_mutex);
+ ps2_begin_command(ps2dev);
serio_pause_rx(ps2dev->serio);
ps2dev->flags = PS2_FLAG_CMD;
@@ -76,7 +95,8 @@ void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout)
wait_event_timeout(ps2dev->wait,
!(ps2dev->flags & PS2_FLAG_CMD),
msecs_to_jiffies(timeout));
- mutex_unlock(&ps2dev->cmd_mutex);
+
+ ps2_end_command(ps2dev);
}
EXPORT_SYMBOL(ps2_drain);
@@ -237,9 +257,9 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
{
int rc;
- mutex_lock(&ps2dev->cmd_mutex);
+ ps2_begin_command(ps2dev);
rc = __ps2_command(ps2dev, param, command);
- mutex_unlock(&ps2dev->cmd_mutex);
+ ps2_end_command(ps2dev);
return rc;
}
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index ab02d72afbf..8cc453c85ea 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -48,8 +48,8 @@ config TOUCHSCREEN_AD7879_I2C
select TOUCHSCREEN_AD7879
help
Say Y here if you have a touchscreen interface using the
- AD7879-1 controller, and your board-specific initialization
- code includes that in its table of I2C devices.
+ AD7879-1/AD7889-1 controller, and your board-specific
+ initialization code includes that in its table of I2C devices.
If unsure, say N (but it's safe to say "Y").
@@ -62,7 +62,7 @@ config TOUCHSCREEN_AD7879_SPI
select TOUCHSCREEN_AD7879
help
Say Y here if you have a touchscreen interface using the
- AD7879 controller, and your board-specific initialization
+ AD7879/AD7889 controller, and your board-specific initialization
code includes that in its table of SPI devices.
If unsure, say N (but it's safe to say "Y").
@@ -169,6 +169,17 @@ config TOUCHSCREEN_WACOM_W8001
To compile this driver as a module, choose M here: the
module will be called wacom_w8001.
+config TOUCHSCREEN_MCS5000
+ tristate "MELFAS MCS-5000 touchscreen"
+ depends on I2C
+ help
+ Say Y here if you have the MELFAS MCS-5000 touchscreen controller
+ chip in your system.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mcs5000_ts.
config TOUCHSCREEN_MTOUCH
tristate "MicroTouch serial touchscreens"
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 4599bf7ad81..15fa62cffc7 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_TOUCHSCREEN_EETI) += eeti_ts.o
obj-$(CONFIG_TOUCHSCREEN_ELO) += elo.o
obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o
obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o
+obj-$(CONFIG_TOUCHSCREEN_MCS5000) += mcs5000_ts.o
obj-$(CONFIG_TOUCHSCREEN_MIGOR) += migor_ts.o
obj-$(CONFIG_TOUCHSCREEN_MTOUCH) += mtouch.o
obj-$(CONFIG_TOUCHSCREEN_MK712) += mk712.o
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index 19b4db7e974..f06332c9e21 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -1,7 +1,8 @@
/*
- * Copyright (C) 2008 Michael Hennerich, Analog Devices Inc.
+ * Copyright (C) 2008-2009 Michael Hennerich, Analog Devices Inc.
*
- * Description: AD7879 based touchscreen, and GPIO driver (I2C/SPI Interface)
+ * Description: AD7879/AD7889 based touchscreen, and GPIO driver
+ * (I2C/SPI Interface)
*
* Bugs: Enter bugs at http://blackfin.uclinux.org/
*
@@ -747,6 +748,7 @@ static int __devexit ad7879_remove(struct i2c_client *client)
static const struct i2c_device_id ad7879_id[] = {
{ "ad7879", 0 },
+ { "ad7889", 0 },
{ }
};
MODULE_DEVICE_TABLE(i2c, ad7879_id);
diff --git a/drivers/input/touchscreen/mcs5000_ts.c b/drivers/input/touchscreen/mcs5000_ts.c
new file mode 100644
index 00000000000..4c28b89757f
--- /dev/null
+++ b/drivers/input/touchscreen/mcs5000_ts.c
@@ -0,0 +1,318 @@
+/*
+ * mcs5000_ts.c - Touchscreen driver for MELFAS MCS-5000 controller
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * Based on wm97xx-core.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/i2c/mcs5000_ts.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/irq.h>
+
+/* Registers */
+#define MCS5000_TS_STATUS 0x00
+#define STATUS_OFFSET 0
+#define STATUS_NO (0 << STATUS_OFFSET)
+#define STATUS_INIT (1 << STATUS_OFFSET)
+#define STATUS_SENSING (2 << STATUS_OFFSET)
+#define STATUS_COORD (3 << STATUS_OFFSET)
+#define STATUS_GESTURE (4 << STATUS_OFFSET)
+#define ERROR_OFFSET 4
+#define ERROR_NO (0 << ERROR_OFFSET)
+#define ERROR_POWER_ON_RESET (1 << ERROR_OFFSET)
+#define ERROR_INT_RESET (2 << ERROR_OFFSET)
+#define ERROR_EXT_RESET (3 << ERROR_OFFSET)
+#define ERROR_INVALID_REG_ADDRESS (8 << ERROR_OFFSET)
+#define ERROR_INVALID_REG_VALUE (9 << ERROR_OFFSET)
+
+#define MCS5000_TS_OP_MODE 0x01
+#define RESET_OFFSET 0
+#define RESET_NO (0 << RESET_OFFSET)
+#define RESET_EXT_SOFT (1 << RESET_OFFSET)
+#define OP_MODE_OFFSET 1
+#define OP_MODE_SLEEP (0 << OP_MODE_OFFSET)
+#define OP_MODE_ACTIVE (1 << OP_MODE_OFFSET)
+#define GESTURE_OFFSET 4
+#define GESTURE_DISABLE (0 << GESTURE_OFFSET)
+#define GESTURE_ENABLE (1 << GESTURE_OFFSET)
+#define PROXIMITY_OFFSET 5
+#define PROXIMITY_DISABLE (0 << PROXIMITY_OFFSET)
+#define PROXIMITY_ENABLE (1 << PROXIMITY_OFFSET)
+#define SCAN_MODE_OFFSET 6
+#define SCAN_MODE_INTERRUPT (0 << SCAN_MODE_OFFSET)
+#define SCAN_MODE_POLLING (1 << SCAN_MODE_OFFSET)
+#define REPORT_RATE_OFFSET 7
+#define REPORT_RATE_40 (0 << REPORT_RATE_OFFSET)
+#define REPORT_RATE_80 (1 << REPORT_RATE_OFFSET)
+
+#define MCS5000_TS_SENS_CTL 0x02
+#define MCS5000_TS_FILTER_CTL 0x03
+#define PRI_FILTER_OFFSET 0
+#define SEC_FILTER_OFFSET 4
+
+#define MCS5000_TS_X_SIZE_UPPER 0x08
+#define MCS5000_TS_X_SIZE_LOWER 0x09
+#define MCS5000_TS_Y_SIZE_UPPER 0x0A
+#define MCS5000_TS_Y_SIZE_LOWER 0x0B
+
+#define MCS5000_TS_INPUT_INFO 0x10
+#define INPUT_TYPE_OFFSET 0
+#define INPUT_TYPE_NONTOUCH (0 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_SINGLE (1 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_DUAL (2 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_PALM (3 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_PROXIMITY (7 << INPUT_TYPE_OFFSET)
+#define GESTURE_CODE_OFFSET 3
+#define GESTURE_CODE_NO (0 << GESTURE_CODE_OFFSET)
+
+#define MCS5000_TS_X_POS_UPPER 0x11
+#define MCS5000_TS_X_POS_LOWER 0x12
+#define MCS5000_TS_Y_POS_UPPER 0x13
+#define MCS5000_TS_Y_POS_LOWER 0x14
+#define MCS5000_TS_Z_POS 0x15
+#define MCS5000_TS_WIDTH 0x16
+#define MCS5000_TS_GESTURE_VAL 0x17
+#define MCS5000_TS_MODULE_REV 0x20
+#define MCS5000_TS_FIRMWARE_VER 0x21
+
+/* Touchscreen absolute values */
+#define MCS5000_MAX_XC 0x3ff
+#define MCS5000_MAX_YC 0x3ff
+
+enum mcs5000_ts_read_offset {
+ READ_INPUT_INFO,
+ READ_X_POS_UPPER,
+ READ_X_POS_LOWER,
+ READ_Y_POS_UPPER,
+ READ_Y_POS_LOWER,
+ READ_BLOCK_SIZE,
+};
+
+/* Each client has this additional data */
+struct mcs5000_ts_data {
+ struct i2c_client *client;
+ struct input_dev *input_dev;
+ const struct mcs5000_ts_platform_data *platform_data;
+};
+
+static irqreturn_t mcs5000_ts_interrupt(int irq, void *dev_id)
+{
+ struct mcs5000_ts_data *data = dev_id;
+ struct i2c_client *client = data->client;
+ u8 buffer[READ_BLOCK_SIZE];
+ int err;
+ int x;
+ int y;
+
+ err = i2c_smbus_read_i2c_block_data(client, MCS5000_TS_INPUT_INFO,
+ READ_BLOCK_SIZE, buffer);
+ if (err < 0) {
+ dev_err(&client->dev, "%s, err[%d]\n", __func__, err);
+ goto out;
+ }
+
+ switch (buffer[READ_INPUT_INFO]) {
+ case INPUT_TYPE_NONTOUCH:
+ input_report_key(data->input_dev, BTN_TOUCH, 0);
+ input_sync(data->input_dev);
+ break;
+
+ case INPUT_TYPE_SINGLE:
+ x = (buffer[READ_X_POS_UPPER] << 8) | buffer[READ_X_POS_LOWER];
+ y = (buffer[READ_Y_POS_UPPER] << 8) | buffer[READ_Y_POS_LOWER];
+
+ input_report_key(data->input_dev, BTN_TOUCH, 1);
+ input_report_abs(data->input_dev, ABS_X, x);
+ input_report_abs(data->input_dev, ABS_Y, y);
+ input_sync(data->input_dev);
+ break;
+
+ case INPUT_TYPE_DUAL:
+ /* TODO */
+ break;
+
+ case INPUT_TYPE_PALM:
+ /* TODO */
+ break;
+
+ case INPUT_TYPE_PROXIMITY:
+ /* TODO */
+ break;
+
+ default:
+ dev_err(&client->dev, "Unknown ts input type %d\n",
+ buffer[READ_INPUT_INFO]);
+ break;
+ }
+
+ out:
+ return IRQ_HANDLED;
+}
+
+static void mcs5000_ts_phys_init(struct mcs5000_ts_data *data)
+{
+ const struct mcs5000_ts_platform_data *platform_data =
+ data->platform_data;
+ struct i2c_client *client = data->client;
+
+ /* Touch reset & sleep mode */
+ i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE,
+ RESET_EXT_SOFT | OP_MODE_SLEEP);
+
+ /* Touch size */
+ i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_UPPER,
+ platform_data->x_size >> 8);
+ i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_LOWER,
+ platform_data->x_size & 0xff);
+ i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_UPPER,
+ platform_data->y_size >> 8);
+ i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_LOWER,
+ platform_data->y_size & 0xff);
+
+ /* Touch active mode & 80 report rate */
+ i2c_smbus_write_byte_data(data->client, MCS5000_TS_OP_MODE,
+ OP_MODE_ACTIVE | REPORT_RATE_80);
+}
+
+static int __devinit mcs5000_ts_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct mcs5000_ts_data *data;
+ struct input_dev *input_dev;
+ int ret;
+
+ if (!client->dev.platform_data)
+ return -EINVAL;
+
+ data = kzalloc(sizeof(struct mcs5000_ts_data), GFP_KERNEL);
+ input_dev = input_allocate_device();
+ if (!data || !input_dev) {
+ dev_err(&client->dev, "Failed to allocate memory\n");
+ ret = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ data->client = client;
+ data->input_dev = input_dev;
+ data->platform_data = client->dev.platform_data;
+
+ input_dev->name = "MELPAS MCS-5000 Touchscreen";
+ input_dev->id.bustype = BUS_I2C;
+ input_dev->dev.parent = &client->dev;
+
+ __set_bit(EV_ABS, input_dev->evbit);
+ __set_bit(EV_KEY, input_dev->evbit);
+ __set_bit(BTN_TOUCH, input_dev->keybit);
+ input_set_abs_params(input_dev, ABS_X, 0, MCS5000_MAX_XC, 0, 0);
+ input_set_abs_params(input_dev, ABS_Y, 0, MCS5000_MAX_YC, 0, 0);
+
+ input_set_drvdata(input_dev, data);
+
+ if (data->platform_data->cfg_pin)
+ data->platform_data->cfg_pin();
+
+ ret = request_threaded_irq(client->irq, NULL, mcs5000_ts_interrupt,
+ IRQF_TRIGGER_LOW | IRQF_ONESHOT, "mcs5000_ts", data);
+
+ if (ret < 0) {
+ dev_err(&client->dev, "Failed to register interrupt\n");
+ goto err_free_mem;
+ }
+
+ ret = input_register_device(data->input_dev);
+ if (ret < 0)
+ goto err_free_irq;
+
+ mcs5000_ts_phys_init(data);
+ i2c_set_clientdata(client, data);
+
+ return 0;
+
+err_free_irq:
+ free_irq(client->irq, data);
+err_free_mem:
+ input_free_device(input_dev);
+ kfree(data);
+ return ret;
+}
+
+static int __devexit mcs5000_ts_remove(struct i2c_client *client)
+{
+ struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+
+ free_irq(client->irq, data);
+ input_unregister_device(data->input_dev);
+ kfree(data);
+ i2c_set_clientdata(client, NULL);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int mcs5000_ts_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+ /* Touch sleep mode */
+ i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE, OP_MODE_SLEEP);
+
+ return 0;
+}
+
+static int mcs5000_ts_resume(struct i2c_client *client)
+{
+ struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+
+ mcs5000_ts_phys_init(data);
+
+ return 0;
+}
+#else
+#define mcs5000_ts_suspend NULL
+#define mcs5000_ts_resume NULL
+#endif
+
+static const struct i2c_device_id mcs5000_ts_id[] = {
+ { "mcs5000_ts", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, mcs5000_ts_id);
+
+static struct i2c_driver mcs5000_ts_driver = {
+ .probe = mcs5000_ts_probe,
+ .remove = __devexit_p(mcs5000_ts_remove),
+ .suspend = mcs5000_ts_suspend,
+ .resume = mcs5000_ts_resume,
+ .driver = {
+ .name = "mcs5000_ts",
+ },
+ .id_table = mcs5000_ts_id,
+};
+
+static int __init mcs5000_ts_init(void)
+{
+ return i2c_add_driver(&mcs5000_ts_driver);
+}
+
+static void __exit mcs5000_ts_exit(void)
+{
+ i2c_del_driver(&mcs5000_ts_driver);
+}
+
+module_init(mcs5000_ts_init);
+module_exit(mcs5000_ts_exit);
+
+/* Module information */
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_DESCRIPTION("Touchscreen driver for MELFAS MCS-5000 controller");
+MODULE_LICENSE("GPL");
diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c
index 1813c84ea5f..f2242db5401 100644
--- a/drivers/leds/leds-clevo-mail.c
+++ b/drivers/leds/leds-clevo-mail.c
@@ -93,6 +93,8 @@ static struct dmi_system_id __initdata mail_led_whitelist[] = {
static void clevo_mail_led_set(struct led_classdev *led_cdev,
enum led_brightness value)
{
+ i8042_lock_chip();
+
if (value == LED_OFF)
i8042_command(NULL, CLEVO_MAIL_LED_OFF);
else if (value <= LED_HALF)
@@ -100,6 +102,8 @@ static void clevo_mail_led_set(struct led_classdev *led_cdev,
else
i8042_command(NULL, CLEVO_MAIL_LED_BLINK_1HZ);
+ i8042_unlock_chip();
+
}
static int clevo_mail_led_blink(struct led_classdev *led_cdev,
@@ -108,6 +112,8 @@ static int clevo_mail_led_blink(struct led_classdev *led_cdev,
{
int status = -EINVAL;
+ i8042_lock_chip();
+
if (*delay_on == 0 /* ms */ && *delay_off == 0 /* ms */) {
/* Special case: the leds subsystem requested us to
* chose one user friendly blinking of the LED, and
@@ -135,6 +141,8 @@ static int clevo_mail_led_blink(struct led_classdev *led_cdev,
*delay_on, *delay_off);
}
+ i8042_unlock_chip();
+
return status;
}
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 020f9573fd8..2158377a135 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -124,6 +124,8 @@ config MD_RAID456
select MD_RAID6_PQ
select ASYNC_MEMCPY
select ASYNC_XOR
+ select ASYNC_PQ
+ select ASYNC_RAID6_RECOV
---help---
A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure
@@ -152,9 +154,33 @@ config MD_RAID456
If unsure, say Y.
+config MULTICORE_RAID456
+ bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
+ depends on MD_RAID456
+ depends on SMP
+ depends on EXPERIMENTAL
+ ---help---
+ Enable the raid456 module to dispatch per-stripe raid operations to a
+ thread pool.
+
+ If unsure, say N.
+
config MD_RAID6_PQ
tristate
+config ASYNC_RAID6_TEST
+ tristate "Self test for hardware accelerated raid6 recovery"
+ depends on MD_RAID6_PQ
+ select ASYNC_RAID6_RECOV
+ ---help---
+ This is a one-shot self test that permutes through the
+ recovery of all the possible two disk failure scenarios for a
+ N-disk array. Recovery is performed with the asynchronous
+ raid6 recovery routines, and will optionally use an offload
+ engine if one is available.
+
+ If unsure, say N.
+
config MD_MULTIPATH
tristate "Multipath I/O support"
depends on BLK_DEV_MD
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 3319c2fec28..6986b0059d2 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -108,6 +108,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
* allocated while we're using it
*/
static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create)
+__releases(bitmap->lock)
+__acquires(bitmap->lock)
{
unsigned char *mappage;
@@ -325,7 +327,6 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
return 0;
bad_alignment:
- rcu_read_unlock();
return -EINVAL;
}
@@ -1207,6 +1208,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
sector_t offset, int *blocks,
int create)
+__releases(bitmap->lock)
+__acquires(bitmap->lock)
{
/* If 'create', we might release the lock and reclaim it.
* The lock must have been taken with interrupts enabled.
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index ea484290544..1ceceb334d5 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -108,6 +108,9 @@ static int linear_congested(void *data, int bits)
linear_conf_t *conf;
int i, ret = 0;
+ if (mddev_congested(mddev, bits))
+ return 1;
+
rcu_read_lock();
conf = rcu_dereference(mddev->private);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6aa497e4baf..26ba42a7912 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -262,6 +262,12 @@ static void mddev_resume(mddev_t *mddev)
mddev->pers->quiesce(mddev, 0);
}
+int mddev_congested(mddev_t *mddev, int bits)
+{
+ return mddev->suspended;
+}
+EXPORT_SYMBOL(mddev_congested);
+
static inline mddev_t *mddev_get(mddev_t *mddev)
{
@@ -4218,7 +4224,7 @@ static int do_md_run(mddev_t * mddev)
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
- "%s_resync");
+ "resync");
if (!mddev->sync_thread) {
printk(KERN_ERR "%s: could not start resync"
" thread...\n",
@@ -4575,10 +4581,10 @@ static int get_version(void __user * arg)
static int get_array_info(mddev_t * mddev, void __user * arg)
{
mdu_array_info_t info;
- int nr,working,active,failed,spare;
+ int nr,working,insync,failed,spare;
mdk_rdev_t *rdev;
- nr=working=active=failed=spare=0;
+ nr=working=insync=failed=spare=0;
list_for_each_entry(rdev, &mddev->disks, same_set) {
nr++;
if (test_bit(Faulty, &rdev->flags))
@@ -4586,7 +4592,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
else {
working++;
if (test_bit(In_sync, &rdev->flags))
- active++;
+ insync++;
else
spare++;
}
@@ -4611,7 +4617,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
info.state = (1<<MD_SB_CLEAN);
if (mddev->bitmap && mddev->bitmap_offset)
info.state = (1<<MD_SB_BITMAP_PRESENT);
- info.active_disks = active;
+ info.active_disks = insync;
info.working_disks = working;
info.failed_disks = failed;
info.spare_disks = spare;
@@ -4721,7 +4727,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
if (!list_empty(&mddev->disks)) {
mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
mdk_rdev_t, same_set);
- int err = super_types[mddev->major_version]
+ err = super_types[mddev->major_version]
.load_super(rdev, rdev0, mddev->minor_version);
if (err < 0) {
printk(KERN_WARNING
@@ -5631,7 +5637,10 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
thread->run = run;
thread->mddev = mddev;
thread->timeout = MAX_SCHEDULE_TIMEOUT;
- thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev));
+ thread->tsk = kthread_run(md_thread, thread,
+ "%s_%s",
+ mdname(thread->mddev),
+ name ?: mddev->pers->name);
if (IS_ERR(thread->tsk)) {
kfree(thread);
return NULL;
@@ -6745,7 +6754,7 @@ void md_check_recovery(mddev_t *mddev)
}
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
- "%s_resync");
+ "resync");
if (!mddev->sync_thread) {
printk(KERN_ERR "%s: could not start resync"
" thread...\n",
diff --git a/drivers/md/md.h b/drivers/md/md.h
index f55d2ff9513..f184b69ef33 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -430,6 +430,7 @@ extern void md_write_end(mddev_t *mddev);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
+extern int mddev_congested(mddev_t *mddev, int bits);
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
sector_t sector, int size, struct page *page);
extern void md_super_wait(mddev_t *mddev);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index d2d3fd54cc6..ee7646f974a 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -150,7 +150,6 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
}
mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
- memset(mp_bh, 0, sizeof(*mp_bh));
mp_bh->master_bio = bio;
mp_bh->mddev = mddev;
@@ -199,6 +198,9 @@ static int multipath_congested(void *data, int bits)
multipath_conf_t *conf = mddev->private;
int i, ret = 0;
+ if (mddev_congested(mddev, bits))
+ return 1;
+
rcu_read_lock();
for (i = 0; i < mddev->raid_disks ; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
@@ -504,7 +506,7 @@ static int multipath_run (mddev_t *mddev)
}
{
- mddev->thread = md_register_thread(multipathd, mddev, "%s_multipath");
+ mddev->thread = md_register_thread(multipathd, mddev, NULL);
if (!mddev->thread) {
printk(KERN_ERR "multipath: couldn't allocate thread"
" for %s\n", mdname(mddev));
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index f845ed98fec..d3a4ce06015 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -44,6 +44,9 @@ static int raid0_congested(void *data, int bits)
mdk_rdev_t **devlist = conf->devlist;
int i, ret = 0;
+ if (mddev_congested(mddev, bits))
+ return 1;
+
for (i = 0; i < mddev->raid_disks && !ret ; i++) {
struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
@@ -86,7 +89,7 @@ static void dump_zones(mddev_t *mddev)
static int create_strip_zones(mddev_t *mddev)
{
- int i, c, j, err;
+ int i, c, err;
sector_t curr_zone_end, sectors;
mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
struct strip_zone *zone;
@@ -198,6 +201,8 @@ static int create_strip_zones(mddev_t *mddev)
/* now do the other zones */
for (i = 1; i < conf->nr_strip_zones; i++)
{
+ int j;
+
zone = conf->strip_zone + i;
dev = conf->devlist + i * mddev->raid_disks;
@@ -207,7 +212,6 @@ static int create_strip_zones(mddev_t *mddev)
c = 0;
for (j=0; j<cnt; j++) {
- char b[BDEVNAME_SIZE];
rdev = conf->devlist[j];
printk(KERN_INFO "raid0: checking %s ...",
bdevname(rdev->bdev, b));
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ff7ed333599..d1b9bd5fd4f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -576,6 +576,9 @@ static int raid1_congested(void *data, int bits)
conf_t *conf = mddev->private;
int i, ret = 0;
+ if (mddev_congested(mddev, bits))
+ return 1;
+
rcu_read_lock();
for (i = 0; i < mddev->raid_disks; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
@@ -851,7 +854,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
read_bio->bi_bdev = mirror->rdev->bdev;
read_bio->bi_end_io = raid1_end_read_request;
- read_bio->bi_rw = READ | do_sync;
+ read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
read_bio->bi_private = r1_bio;
generic_make_request(read_bio);
@@ -943,7 +946,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
mbio->bi_end_io = raid1_end_write_request;
- mbio->bi_rw = WRITE | do_barriers | do_sync;
+ mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) |
+ (do_sync << BIO_RW_SYNCIO);
mbio->bi_private = r1_bio;
if (behind_pages) {
@@ -1623,7 +1627,8 @@ static void raid1d(mddev_t *mddev)
conf->mirrors[i].rdev->data_offset;
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
bio->bi_end_io = raid1_end_write_request;
- bio->bi_rw = WRITE | do_sync;
+ bio->bi_rw = WRITE |
+ (do_sync << BIO_RW_SYNCIO);
bio->bi_private = r1_bio;
r1_bio->bios[i] = bio;
generic_make_request(bio);
@@ -1672,7 +1677,7 @@ static void raid1d(mddev_t *mddev)
bio->bi_sector = r1_bio->sector + rdev->data_offset;
bio->bi_bdev = rdev->bdev;
bio->bi_end_io = raid1_end_read_request;
- bio->bi_rw = READ | do_sync;
+ bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
bio->bi_private = r1_bio;
unplug = 1;
generic_make_request(bio);
@@ -2047,7 +2052,7 @@ static int run(mddev_t *mddev)
conf->last_used = j;
- mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");
+ mddev->thread = md_register_thread(raid1d, mddev, NULL);
if (!mddev->thread) {
printk(KERN_ERR
"raid1: couldn't allocate thread for %s\n",
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d0a2152e064..51c4c5c4d87 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -631,6 +631,8 @@ static int raid10_congested(void *data, int bits)
conf_t *conf = mddev->private;
int i, ret = 0;
+ if (mddev_congested(mddev, bits))
+ return 1;
rcu_read_lock();
for (i = 0; i < mddev->raid_disks && ret == 0; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
@@ -882,7 +884,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
mirror->rdev->data_offset;
read_bio->bi_bdev = mirror->rdev->bdev;
read_bio->bi_end_io = raid10_end_read_request;
- read_bio->bi_rw = READ | do_sync;
+ read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
read_bio->bi_private = r10_bio;
generic_make_request(read_bio);
@@ -950,7 +952,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
conf->mirrors[d].rdev->data_offset;
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_rw = WRITE | do_sync;
+ mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO);
mbio->bi_private = r10_bio;
atomic_inc(&r10_bio->remaining);
@@ -1623,7 +1625,7 @@ static void raid10d(mddev_t *mddev)
bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset;
bio->bi_bdev = rdev->bdev;
- bio->bi_rw = READ | do_sync;
+ bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
bio->bi_private = r10_bio;
bio->bi_end_io = raid10_end_read_request;
unplug = 1;
@@ -1773,7 +1775,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
/* recovery... the complicated one */
- int i, j, k;
+ int j, k;
r10_bio = NULL;
for (i=0 ; i<conf->raid_disks; i++)
@@ -2188,7 +2190,7 @@ static int run(mddev_t *mddev)
}
- mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10");
+ mddev->thread = md_register_thread(raid10d, mddev, NULL);
if (!mddev->thread) {
printk(KERN_ERR
"raid10: couldn't allocate thread for %s\n",
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 826eb346735..94829804ab7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -47,7 +47,9 @@
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
+#include <linux/async.h>
#include <linux/seq_file.h>
+#include <linux/cpu.h>
#include "md.h"
#include "raid5.h"
#include "bitmap.h"
@@ -499,11 +501,18 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
struct page *bio_page;
int i;
int page_offset;
+ struct async_submit_ctl submit;
+ enum async_tx_flags flags = 0;
if (bio->bi_sector >= sector)
page_offset = (signed)(bio->bi_sector - sector) * 512;
else
page_offset = (signed)(sector - bio->bi_sector) * -512;
+
+ if (frombio)
+ flags |= ASYNC_TX_FENCE;
+ init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
+
bio_for_each_segment(bvl, bio, i) {
int len = bio_iovec_idx(bio, i)->bv_len;
int clen;
@@ -525,15 +534,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
bio_page = bio_iovec_idx(bio, i)->bv_page;
if (frombio)
tx = async_memcpy(page, bio_page, page_offset,
- b_offset, clen,
- ASYNC_TX_DEP_ACK,
- tx, NULL, NULL);
+ b_offset, clen, &submit);
else
tx = async_memcpy(bio_page, page, b_offset,
- page_offset, clen,
- ASYNC_TX_DEP_ACK,
- tx, NULL, NULL);
+ page_offset, clen, &submit);
}
+ /* chain the operations */
+ submit.depend_tx = tx;
+
if (clen < len) /* hit end of page */
break;
page_offset += len;
@@ -592,6 +600,7 @@ static void ops_run_biofill(struct stripe_head *sh)
{
struct dma_async_tx_descriptor *tx = NULL;
raid5_conf_t *conf = sh->raid_conf;
+ struct async_submit_ctl submit;
int i;
pr_debug("%s: stripe %llu\n", __func__,
@@ -615,22 +624,34 @@ static void ops_run_biofill(struct stripe_head *sh)
}
atomic_inc(&sh->count);
- async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
- ops_complete_biofill, sh);
+ init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
+ async_trigger_callback(&submit);
}
-static void ops_complete_compute5(void *stripe_head_ref)
+static void mark_target_uptodate(struct stripe_head *sh, int target)
{
- struct stripe_head *sh = stripe_head_ref;
- int target = sh->ops.target;
- struct r5dev *tgt = &sh->dev[target];
+ struct r5dev *tgt;
- pr_debug("%s: stripe %llu\n", __func__,
- (unsigned long long)sh->sector);
+ if (target < 0)
+ return;
+ tgt = &sh->dev[target];
set_bit(R5_UPTODATE, &tgt->flags);
BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
clear_bit(R5_Wantcompute, &tgt->flags);
+}
+
+static void ops_complete_compute(void *stripe_head_ref)
+{
+ struct stripe_head *sh = stripe_head_ref;
+
+ pr_debug("%s: stripe %llu\n", __func__,
+ (unsigned long long)sh->sector);
+
+ /* mark the computed target(s) as uptodate */
+ mark_target_uptodate(sh, sh->ops.target);
+ mark_target_uptodate(sh, sh->ops.target2);
+
clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
if (sh->check_state == check_state_compute_run)
sh->check_state = check_state_compute_result;
@@ -638,16 +659,24 @@ static void ops_complete_compute5(void *stripe_head_ref)
release_stripe(sh);
}
-static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
+/* return a pointer to the address conversion region of the scribble buffer */
+static addr_conv_t *to_addr_conv(struct stripe_head *sh,
+ struct raid5_percpu *percpu)
+{
+ return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
{
- /* kernel stack size limits the total number of disks */
int disks = sh->disks;
- struct page *xor_srcs[disks];
+ struct page **xor_srcs = percpu->scribble;
int target = sh->ops.target;
struct r5dev *tgt = &sh->dev[target];
struct page *xor_dest = tgt->page;
int count = 0;
struct dma_async_tx_descriptor *tx;
+ struct async_submit_ctl submit;
int i;
pr_debug("%s: stripe %llu block: %d\n",
@@ -660,17 +689,215 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
atomic_inc(&sh->count);
+ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
+ ops_complete_compute, sh, to_addr_conv(sh, percpu));
if (unlikely(count == 1))
- tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
- 0, NULL, ops_complete_compute5, sh);
+ tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
else
- tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- ASYNC_TX_XOR_ZERO_DST, NULL,
- ops_complete_compute5, sh);
+ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
return tx;
}
+/* set_syndrome_sources - populate source buffers for gen_syndrome
+ * @srcs - (struct page *) array of size sh->disks
+ * @sh - stripe_head to parse
+ *
+ * Populates srcs in proper layout order for the stripe and returns the
+ * 'count' of sources to be used in a call to async_gen_syndrome. The P
+ * destination buffer is recorded in srcs[count] and the Q destination
+ * is recorded in srcs[count+1]].
+ */
+static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
+{
+ int disks = sh->disks;
+ int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
+ int d0_idx = raid6_d0(sh);
+ int count;
+ int i;
+
+ for (i = 0; i < disks; i++)
+ srcs[i] = (void *)raid6_empty_zero_page;
+
+ count = 0;
+ i = d0_idx;
+ do {
+ int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+ srcs[slot] = sh->dev[i].page;
+ i = raid6_next_disk(i, disks);
+ } while (i != d0_idx);
+ BUG_ON(count != syndrome_disks);
+
+ return count;
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
+{
+ int disks = sh->disks;
+ struct page **blocks = percpu->scribble;
+ int target;
+ int qd_idx = sh->qd_idx;
+ struct dma_async_tx_descriptor *tx;
+ struct async_submit_ctl submit;
+ struct r5dev *tgt;
+ struct page *dest;
+ int i;
+ int count;
+
+ if (sh->ops.target < 0)
+ target = sh->ops.target2;
+ else if (sh->ops.target2 < 0)
+ target = sh->ops.target;
+ else
+ /* we should only have one valid target */
+ BUG();
+ BUG_ON(target < 0);
+ pr_debug("%s: stripe %llu block: %d\n",
+ __func__, (unsigned long long)sh->sector, target);
+
+ tgt = &sh->dev[target];
+ BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
+ dest = tgt->page;
+
+ atomic_inc(&sh->count);
+
+ if (target == qd_idx) {
+ count = set_syndrome_sources(blocks, sh);
+ blocks[count] = NULL; /* regenerating p is not necessary */
+ BUG_ON(blocks[count+1] != dest); /* q should already be set */
+ init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+ ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
+ tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
+ } else {
+ /* Compute any data- or p-drive using XOR */
+ count = 0;
+ for (i = disks; i-- ; ) {
+ if (i == target || i == qd_idx)
+ continue;
+ blocks[count++] = sh->dev[i].page;
+ }
+
+ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+ NULL, ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
+ tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
+ }
+
+ return tx;
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
+{
+ int i, count, disks = sh->disks;
+ int syndrome_disks = sh->ddf_layout ? disks : disks-2;
+ int d0_idx = raid6_d0(sh);
+ int faila = -1, failb = -1;
+ int target = sh->ops.target;
+ int target2 = sh->ops.target2;
+ struct r5dev *tgt = &sh->dev[target];
+ struct r5dev *tgt2 = &sh->dev[target2];
+ struct dma_async_tx_descriptor *tx;
+ struct page **blocks = percpu->scribble;
+ struct async_submit_ctl submit;
+
+ pr_debug("%s: stripe %llu block1: %d block2: %d\n",
+ __func__, (unsigned long long)sh->sector, target, target2);
+ BUG_ON(target < 0 || target2 < 0);
+ BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
+ BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
+
+ /* we need to open-code set_syndrome_sources to handle the
+ * slot number conversion for 'faila' and 'failb'
+ */
+ for (i = 0; i < disks ; i++)
+ blocks[i] = (void *)raid6_empty_zero_page;
+ count = 0;
+ i = d0_idx;
+ do {
+ int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+ blocks[slot] = sh->dev[i].page;
+
+ if (i == target)
+ faila = slot;
+ if (i == target2)
+ failb = slot;
+ i = raid6_next_disk(i, disks);
+ } while (i != d0_idx);
+ BUG_ON(count != syndrome_disks);
+
+ BUG_ON(faila == failb);
+ if (failb < faila)
+ swap(faila, failb);
+ pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
+ __func__, (unsigned long long)sh->sector, faila, failb);
+
+ atomic_inc(&sh->count);
+
+ if (failb == syndrome_disks+1) {
+ /* Q disk is one of the missing disks */
+ if (faila == syndrome_disks) {
+ /* Missing P+Q, just recompute */
+ init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+ ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
+ return async_gen_syndrome(blocks, 0, count+2,
+ STRIPE_SIZE, &submit);
+ } else {
+ struct page *dest;
+ int data_target;
+ int qd_idx = sh->qd_idx;
+
+ /* Missing D+Q: recompute D from P, then recompute Q */
+ if (target == qd_idx)
+ data_target = target2;
+ else
+ data_target = target;
+
+ count = 0;
+ for (i = disks; i-- ; ) {
+ if (i == data_target || i == qd_idx)
+ continue;
+ blocks[count++] = sh->dev[i].page;
+ }
+ dest = sh->dev[data_target].page;
+ init_async_submit(&submit,
+ ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+ NULL, NULL, NULL,
+ to_addr_conv(sh, percpu));
+ tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
+ &submit);
+
+ count = set_syndrome_sources(blocks, sh);
+ init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+ ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
+ return async_gen_syndrome(blocks, 0, count+2,
+ STRIPE_SIZE, &submit);
+ }
+ } else {
+ init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+ ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
+ if (failb == syndrome_disks) {
+ /* We're missing D+P. */
+ return async_raid6_datap_recov(syndrome_disks+2,
+ STRIPE_SIZE, faila,
+ blocks, &submit);
+ } else {
+ /* We're missing D+D. */
+ return async_raid6_2data_recov(syndrome_disks+2,
+ STRIPE_SIZE, faila, failb,
+ blocks, &submit);
+ }
+ }
+}
+
+
static void ops_complete_prexor(void *stripe_head_ref)
{
struct stripe_head *sh = stripe_head_ref;
@@ -680,12 +907,13 @@ static void ops_complete_prexor(void *stripe_head_ref)
}
static struct dma_async_tx_descriptor *
-ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
+ struct dma_async_tx_descriptor *tx)
{
- /* kernel stack size limits the total number of disks */
int disks = sh->disks;
- struct page *xor_srcs[disks];
+ struct page **xor_srcs = percpu->scribble;
int count = 0, pd_idx = sh->pd_idx, i;
+ struct async_submit_ctl submit;
/* existing parity data subtracted */
struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -700,9 +928,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
xor_srcs[count++] = dev->page;
}
- tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
- ops_complete_prexor, sh);
+ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ ops_complete_prexor, sh, to_addr_conv(sh, percpu));
+ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
return tx;
}
@@ -742,17 +970,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
return tx;
}
-static void ops_complete_postxor(void *stripe_head_ref)
+static void ops_complete_reconstruct(void *stripe_head_ref)
{
struct stripe_head *sh = stripe_head_ref;
- int disks = sh->disks, i, pd_idx = sh->pd_idx;
+ int disks = sh->disks;
+ int pd_idx = sh->pd_idx;
+ int qd_idx = sh->qd_idx;
+ int i;
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
- if (dev->written || i == pd_idx)
+
+ if (dev->written || i == pd_idx || i == qd_idx)
set_bit(R5_UPTODATE, &dev->flags);
}
@@ -770,12 +1002,12 @@ static void ops_complete_postxor(void *stripe_head_ref)
}
static void
-ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
+ struct dma_async_tx_descriptor *tx)
{
- /* kernel stack size limits the total number of disks */
int disks = sh->disks;
- struct page *xor_srcs[disks];
-
+ struct page **xor_srcs = percpu->scribble;
+ struct async_submit_ctl submit;
int count = 0, pd_idx = sh->pd_idx, i;
struct page *xor_dest;
int prexor = 0;
@@ -809,18 +1041,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
* set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
* for the synchronous xor case
*/
- flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
+ flags = ASYNC_TX_ACK |
(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
atomic_inc(&sh->count);
- if (unlikely(count == 1)) {
- flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
- tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
- flags, tx, ops_complete_postxor, sh);
- } else
- tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- flags, tx, ops_complete_postxor, sh);
+ init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
+ to_addr_conv(sh, percpu));
+ if (unlikely(count == 1))
+ tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+ else
+ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+}
+
+static void
+ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
+ struct dma_async_tx_descriptor *tx)
+{
+ struct async_submit_ctl submit;
+ struct page **blocks = percpu->scribble;
+ int count;
+
+ pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
+
+ count = set_syndrome_sources(blocks, sh);
+
+ atomic_inc(&sh->count);
+
+ init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
+ sh, to_addr_conv(sh, percpu));
+ async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
}
static void ops_complete_check(void *stripe_head_ref)
@@ -835,63 +1085,115 @@ static void ops_complete_check(void *stripe_head_ref)
release_stripe(sh);
}
-static void ops_run_check(struct stripe_head *sh)
+static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
{
- /* kernel stack size limits the total number of disks */
int disks = sh->disks;
- struct page *xor_srcs[disks];
+ int pd_idx = sh->pd_idx;
+ int qd_idx = sh->qd_idx;
+ struct page *xor_dest;
+ struct page **xor_srcs = percpu->scribble;
struct dma_async_tx_descriptor *tx;
-
- int count = 0, pd_idx = sh->pd_idx, i;
- struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
+ struct async_submit_ctl submit;
+ int count;
+ int i;
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
+ count = 0;
+ xor_dest = sh->dev[pd_idx].page;
+ xor_srcs[count++] = xor_dest;
for (i = disks; i--; ) {
- struct r5dev *dev = &sh->dev[i];
- if (i != pd_idx)
- xor_srcs[count++] = dev->page;
+ if (i == pd_idx || i == qd_idx)
+ continue;
+ xor_srcs[count++] = sh->dev[i].page;
}
- tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- &sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
+ init_async_submit(&submit, 0, NULL, NULL, NULL,
+ to_addr_conv(sh, percpu));
+ tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
+ &sh->ops.zero_sum_result, &submit);
+
+ atomic_inc(&sh->count);
+ init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
+ tx = async_trigger_callback(&submit);
+}
+
+static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
+{
+ struct page **srcs = percpu->scribble;
+ struct async_submit_ctl submit;
+ int count;
+
+ pr_debug("%s: stripe %llu checkp: %d\n", __func__,
+ (unsigned long long)sh->sector, checkp);
+
+ count = set_syndrome_sources(srcs, sh);
+ if (!checkp)
+ srcs[count] = NULL;
atomic_inc(&sh->count);
- tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
- ops_complete_check, sh);
+ init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
+ sh, to_addr_conv(sh, percpu));
+ async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
+ &sh->ops.zero_sum_result, percpu->spare_page, &submit);
}
-static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
+static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
{
int overlap_clear = 0, i, disks = sh->disks;
struct dma_async_tx_descriptor *tx = NULL;
+ raid5_conf_t *conf = sh->raid_conf;
+ int level = conf->level;
+ struct raid5_percpu *percpu;
+ unsigned long cpu;
+ cpu = get_cpu();
+ percpu = per_cpu_ptr(conf->percpu, cpu);
if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
ops_run_biofill(sh);
overlap_clear++;
}
if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
- tx = ops_run_compute5(sh);
- /* terminate the chain if postxor is not set to be run */
- if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
+ if (level < 6)
+ tx = ops_run_compute5(sh, percpu);
+ else {
+ if (sh->ops.target2 < 0 || sh->ops.target < 0)
+ tx = ops_run_compute6_1(sh, percpu);
+ else
+ tx = ops_run_compute6_2(sh, percpu);
+ }
+ /* terminate the chain if reconstruct is not set to be run */
+ if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
async_tx_ack(tx);
}
if (test_bit(STRIPE_OP_PREXOR, &ops_request))
- tx = ops_run_prexor(sh, tx);
+ tx = ops_run_prexor(sh, percpu, tx);
if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
tx = ops_run_biodrain(sh, tx);
overlap_clear++;
}
- if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
- ops_run_postxor(sh, tx);
+ if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
+ if (level < 6)
+ ops_run_reconstruct5(sh, percpu, tx);
+ else
+ ops_run_reconstruct6(sh, percpu, tx);
+ }
- if (test_bit(STRIPE_OP_CHECK, &ops_request))
- ops_run_check(sh);
+ if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
+ if (sh->check_state == check_state_run)
+ ops_run_check_p(sh, percpu);
+ else if (sh->check_state == check_state_run_q)
+ ops_run_check_pq(sh, percpu, 0);
+ else if (sh->check_state == check_state_run_pq)
+ ops_run_check_pq(sh, percpu, 1);
+ else
+ BUG();
+ }
if (overlap_clear)
for (i = disks; i--; ) {
@@ -899,6 +1201,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
if (test_and_clear_bit(R5_Overlap, &dev->flags))
wake_up(&sh->raid_conf->wait_for_overlap);
}
+ put_cpu();
}
static int grow_one_stripe(raid5_conf_t *conf)
@@ -948,6 +1251,28 @@ static int grow_stripes(raid5_conf_t *conf, int num)
return 0;
}
+/**
+ * scribble_len - return the required size of the scribble region
+ * @num - total number of disks in the array
+ *
+ * The size must be enough to contain:
+ * 1/ a struct page pointer for each device in the array +2
+ * 2/ room to convert each entry in (1) to its corresponding dma
+ * (dma_map_page()) or page (page_address()) address.
+ *
+ * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
+ * calculate over all devices (not just the data blocks), using zeros in place
+ * of the P and Q blocks.
+ */
+static size_t scribble_len(int num)
+{
+ size_t len;
+
+ len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
+
+ return len;
+}
+
static int resize_stripes(raid5_conf_t *conf, int newsize)
{
/* Make all the stripes able to hold 'newsize' devices.
@@ -976,6 +1301,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
struct stripe_head *osh, *nsh;
LIST_HEAD(newstripes);
struct disk_info *ndisks;
+ unsigned long cpu;
int err;
struct kmem_cache *sc;
int i;
@@ -1041,7 +1367,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
/* Step 3.
* At this point, we are holding all the stripes so the array
* is completely stalled, so now is a good time to resize
- * conf->disks.
+ * conf->disks and the scribble region
*/
ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
if (ndisks) {
@@ -1052,10 +1378,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
} else
err = -ENOMEM;
+ get_online_cpus();
+ conf->scribble_len = scribble_len(newsize);
+ for_each_present_cpu(cpu) {
+ struct raid5_percpu *percpu;
+ void *scribble;
+
+ percpu = per_cpu_ptr(conf->percpu, cpu);
+ scribble = kmalloc(conf->scribble_len, GFP_NOIO);
+
+ if (scribble) {
+ kfree(percpu->scribble);
+ percpu->scribble = scribble;
+ } else {
+ err = -ENOMEM;
+ break;
+ }
+ }
+ put_online_cpus();
+
/* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
list_del_init(&nsh->lru);
+
for (i=conf->raid_disks; i < newsize; i++)
if (nsh->dev[i].page == NULL) {
struct page *p = alloc_page(GFP_NOIO);
@@ -1594,258 +1940,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
}
-
-/*
- * Copy data between a page in the stripe cache, and one or more bion
- * The page could align with the middle of the bio, or there could be
- * several bion, each with several bio_vecs, which cover part of the page
- * Multiple bion are linked together on bi_next. There may be extras
- * at the end of this list. We ignore them.
- */
-static void copy_data(int frombio, struct bio *bio,
- struct page *page,
- sector_t sector)
-{
- char *pa = page_address(page);
- struct bio_vec *bvl;
- int i;
- int page_offset;
-
- if (bio->bi_sector >= sector)
- page_offset = (signed)(bio->bi_sector - sector) * 512;
- else
- page_offset = (signed)(sector - bio->bi_sector) * -512;
- bio_for_each_segment(bvl, bio, i) {
- int len = bio_iovec_idx(bio,i)->bv_len;
- int clen;
- int b_offset = 0;
-
- if (page_offset < 0) {
- b_offset = -page_offset;
- page_offset += b_offset;
- len -= b_offset;
- }
-
- if (len > 0 && page_offset + len > STRIPE_SIZE)
- clen = STRIPE_SIZE - page_offset;
- else clen = len;
-
- if (clen > 0) {
- char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
- if (frombio)
- memcpy(pa+page_offset, ba+b_offset, clen);
- else
- memcpy(ba+b_offset, pa+page_offset, clen);
- __bio_kunmap_atomic(ba, KM_USER0);
- }
- if (clen < len) /* hit end of page */
- break;
- page_offset += len;
- }
-}
-
-#define check_xor() do { \
- if (count == MAX_XOR_BLOCKS) { \
- xor_blocks(count, STRIPE_SIZE, dest, ptr);\
- count = 0; \
- } \
- } while(0)
-
-static void compute_parity6(struct stripe_head *sh, int method)
-{
- raid5_conf_t *conf = sh->raid_conf;
- int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
- int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
- struct bio *chosen;
- /**** FIX THIS: This could be very bad if disks is close to 256 ****/
- void *ptrs[syndrome_disks+2];
-
- pd_idx = sh->pd_idx;
- qd_idx = sh->qd_idx;
- d0_idx = raid6_d0(sh);
-
- pr_debug("compute_parity, stripe %llu, method %d\n",
- (unsigned long long)sh->sector, method);
-
- switch(method) {
- case READ_MODIFY_WRITE:
- BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */
- case RECONSTRUCT_WRITE:
- for (i= disks; i-- ;)
- if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
- chosen = sh->dev[i].towrite;
- sh->dev[i].towrite = NULL;
-
- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
- wake_up(&conf->wait_for_overlap);
-
- BUG_ON(sh->dev[i].written);
- sh->dev[i].written = chosen;
- }
- break;
- case CHECK_PARITY:
- BUG(); /* Not implemented yet */
- }
-
- for (i = disks; i--;)
- if (sh->dev[i].written) {
- sector_t sector = sh->dev[i].sector;
- struct bio *wbi = sh->dev[i].written;
- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
- copy_data(1, wbi, sh->dev[i].page, sector);
- wbi = r5_next_bio(wbi, sector);
- }
-
- set_bit(R5_LOCKED, &sh->dev[i].flags);
- set_bit(R5_UPTODATE, &sh->dev[i].flags);
- }
-
- /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
-
- for (i = 0; i < disks; i++)
- ptrs[i] = (void *)raid6_empty_zero_page;
-
- count = 0;
- i = d0_idx;
- do {
- int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
-
- ptrs[slot] = page_address(sh->dev[i].page);
- if (slot < syndrome_disks &&
- !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
- printk(KERN_ERR "block %d/%d not uptodate "
- "on parity calc\n", i, count);
- BUG();
- }
-
- i = raid6_next_disk(i, disks);
- } while (i != d0_idx);
- BUG_ON(count != syndrome_disks);
-
- raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
-
- switch(method) {
- case RECONSTRUCT_WRITE:
- set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
- set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
- set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
- set_bit(R5_LOCKED, &sh->dev[qd_idx].flags);
- break;
- case UPDATE_PARITY:
- set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
- set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
- break;
- }
-}
-
-
-/* Compute one missing block */
-static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
-{
- int i, count, disks = sh->disks;
- void *ptr[MAX_XOR_BLOCKS], *dest, *p;
- int qd_idx = sh->qd_idx;
-
- pr_debug("compute_block_1, stripe %llu, idx %d\n",
- (unsigned long long)sh->sector, dd_idx);
-
- if ( dd_idx == qd_idx ) {
- /* We're actually computing the Q drive */
- compute_parity6(sh, UPDATE_PARITY);
- } else {
- dest = page_address(sh->dev[dd_idx].page);
- if (!nozero) memset(dest, 0, STRIPE_SIZE);
- count = 0;
- for (i = disks ; i--; ) {
- if (i == dd_idx || i == qd_idx)
- continue;
- p = page_address(sh->dev[i].page);
- if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
- ptr[count++] = p;
- else
- printk("compute_block() %d, stripe %llu, %d"
- " not present\n", dd_idx,
- (unsigned long long)sh->sector, i);
-
- check_xor();
- }
- if (count)
- xor_blocks(count, STRIPE_SIZE, dest, ptr);
- if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
- else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
- }
-}
-
-/* Compute two missing blocks */
-static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
-{
- int i, count, disks = sh->disks;
- int syndrome_disks = sh->ddf_layout ? disks : disks-2;
- int d0_idx = raid6_d0(sh);
- int faila = -1, failb = -1;
- /**** FIX THIS: This could be very bad if disks is close to 256 ****/
- void *ptrs[syndrome_disks+2];
-
- for (i = 0; i < disks ; i++)
- ptrs[i] = (void *)raid6_empty_zero_page;
- count = 0;
- i = d0_idx;
- do {
- int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
-
- ptrs[slot] = page_address(sh->dev[i].page);
-
- if (i == dd_idx1)
- faila = slot;
- if (i == dd_idx2)
- failb = slot;
- i = raid6_next_disk(i, disks);
- } while (i != d0_idx);
- BUG_ON(count != syndrome_disks);
-
- BUG_ON(faila == failb);
- if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
-
- pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
- (unsigned long long)sh->sector, dd_idx1, dd_idx2,
- faila, failb);
-
- if (failb == syndrome_disks+1) {
- /* Q disk is one of the missing disks */
- if (faila == syndrome_disks) {
- /* Missing P+Q, just recompute */
- compute_parity6(sh, UPDATE_PARITY);
- return;
- } else {
- /* We're missing D+Q; recompute D from P */
- compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
- dd_idx2 : dd_idx1),
- 0);
- compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
- return;
- }
- }
-
- /* We're missing D+P or D+D; */
- if (failb == syndrome_disks) {
- /* We're missing D+P. */
- raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
- } else {
- /* We're missing D+D. */
- raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
- ptrs);
- }
-
- /* Both the above update both missing blocks */
- set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
- set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
-}
-
static void
-schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
+schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
int rcw, int expand)
{
int i, pd_idx = sh->pd_idx, disks = sh->disks;
+ raid5_conf_t *conf = sh->raid_conf;
+ int level = conf->level;
if (rcw) {
/* if we are not expanding this is a proper write request, and
@@ -1858,7 +1959,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
} else
sh->reconstruct_state = reconstruct_state_run;
- set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
+ set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
@@ -1871,17 +1972,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
s->locked++;
}
}
- if (s->locked + 1 == disks)
+ if (s->locked + conf->max_degraded == disks)
if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
- atomic_inc(&sh->raid_conf->pending_full_writes);
+ atomic_inc(&conf->pending_full_writes);
} else {
+ BUG_ON(level == 6);
BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
sh->reconstruct_state = reconstruct_state_prexor_drain_run;
set_bit(STRIPE_OP_PREXOR, &s->ops_request);
set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
- set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
+ set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
@@ -1899,13 +2001,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
}
}
- /* keep the parity disk locked while asynchronous operations
+ /* keep the parity disk(s) locked while asynchronous operations
* are in flight
*/
set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
s->locked++;
+ if (level == 6) {
+ int qd_idx = sh->qd_idx;
+ struct r5dev *dev = &sh->dev[qd_idx];
+
+ set_bit(R5_LOCKED, &dev->flags);
+ clear_bit(R5_UPTODATE, &dev->flags);
+ s->locked++;
+ }
+
pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
__func__, (unsigned long long)sh->sector,
s->locked, s->ops_request);
@@ -1986,13 +2097,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
static void end_reshape(raid5_conf_t *conf);
-static int page_is_zero(struct page *p)
-{
- char *a = page_address(p);
- return ((*(u32*)a) == 0 &&
- memcmp(a, a+4, STRIPE_SIZE-4)==0);
-}
-
static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
struct stripe_head *sh)
{
@@ -2132,9 +2236,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
set_bit(R5_Wantcompute, &dev->flags);
sh->ops.target = disk_idx;
+ sh->ops.target2 = -1;
s->req_compute = 1;
/* Careful: from this point on 'uptodate' is in the eye
- * of raid5_run_ops which services 'compute' operations
+ * of raid_run_ops which services 'compute' operations
* before writes. R5_Wantcompute flags a block that will
* be R5_UPTODATE by the time it is needed for a
* subsequent operation.
@@ -2173,61 +2278,104 @@ static void handle_stripe_fill5(struct stripe_head *sh,
set_bit(STRIPE_HANDLE, &sh->state);
}
-static void handle_stripe_fill6(struct stripe_head *sh,
- struct stripe_head_state *s, struct r6_state *r6s,
- int disks)
+/* fetch_block6 - checks the given member device to see if its data needs
+ * to be read or computed to satisfy a request.
+ *
+ * Returns 1 when no more member devices need to be checked, otherwise returns
+ * 0 to tell the loop in handle_stripe_fill6 to continue
+ */
+static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
+ struct r6_state *r6s, int disk_idx, int disks)
{
- int i;
- for (i = disks; i--; ) {
- struct r5dev *dev = &sh->dev[i];
- if (!test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags) &&
- (dev->toread || (dev->towrite &&
- !test_bit(R5_OVERWRITE, &dev->flags)) ||
- s->syncing || s->expanding ||
- (s->failed >= 1 &&
- (sh->dev[r6s->failed_num[0]].toread ||
- s->to_write)) ||
- (s->failed >= 2 &&
- (sh->dev[r6s->failed_num[1]].toread ||
- s->to_write)))) {
- /* we would like to get this block, possibly
- * by computing it, but we might not be able to
+ struct r5dev *dev = &sh->dev[disk_idx];
+ struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
+ &sh->dev[r6s->failed_num[1]] };
+
+ if (!test_bit(R5_LOCKED, &dev->flags) &&
+ !test_bit(R5_UPTODATE, &dev->flags) &&
+ (dev->toread ||
+ (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+ s->syncing || s->expanding ||
+ (s->failed >= 1 &&
+ (fdev[0]->toread || s->to_write)) ||
+ (s->failed >= 2 &&
+ (fdev[1]->toread || s->to_write)))) {
+ /* we would like to get this block, possibly by computing it,
+ * otherwise read it if the backing disk is insync
+ */
+ BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
+ BUG_ON(test_bit(R5_Wantread, &dev->flags));
+ if ((s->uptodate == disks - 1) &&
+ (s->failed && (disk_idx == r6s->failed_num[0] ||
+ disk_idx == r6s->failed_num[1]))) {
+ /* have disk failed, and we're requested to fetch it;
+ * do compute it
*/
- if ((s->uptodate == disks - 1) &&
- (s->failed && (i == r6s->failed_num[0] ||
- i == r6s->failed_num[1]))) {
- pr_debug("Computing stripe %llu block %d\n",
- (unsigned long long)sh->sector, i);
- compute_block_1(sh, i, 0);
- s->uptodate++;
- } else if ( s->uptodate == disks-2 && s->failed >= 2 ) {
- /* Computing 2-failure is *very* expensive; only
- * do it if failed >= 2
- */
- int other;
- for (other = disks; other--; ) {
- if (other == i)
- continue;
- if (!test_bit(R5_UPTODATE,
- &sh->dev[other].flags))
- break;
- }
- BUG_ON(other < 0);
- pr_debug("Computing stripe %llu blocks %d,%d\n",
- (unsigned long long)sh->sector,
- i, other);
- compute_block_2(sh, i, other);
- s->uptodate += 2;
- } else if (test_bit(R5_Insync, &dev->flags)) {
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantread, &dev->flags);
- s->locked++;
- pr_debug("Reading block %d (sync=%d)\n",
- i, s->syncing);
+ pr_debug("Computing stripe %llu block %d\n",
+ (unsigned long long)sh->sector, disk_idx);
+ set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+ set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+ set_bit(R5_Wantcompute, &dev->flags);
+ sh->ops.target = disk_idx;
+ sh->ops.target2 = -1; /* no 2nd target */
+ s->req_compute = 1;
+ s->uptodate++;
+ return 1;
+ } else if (s->uptodate == disks-2 && s->failed >= 2) {
+ /* Computing 2-failure is *very* expensive; only
+ * do it if failed >= 2
+ */
+ int other;
+ for (other = disks; other--; ) {
+ if (other == disk_idx)
+ continue;
+ if (!test_bit(R5_UPTODATE,
+ &sh->dev[other].flags))
+ break;
}
+ BUG_ON(other < 0);
+ pr_debug("Computing stripe %llu blocks %d,%d\n",
+ (unsigned long long)sh->sector,
+ disk_idx, other);
+ set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+ set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+ set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
+ set_bit(R5_Wantcompute, &sh->dev[other].flags);
+ sh->ops.target = disk_idx;
+ sh->ops.target2 = other;
+ s->uptodate += 2;
+ s->req_compute = 1;
+ return 1;
+ } else if (test_bit(R5_Insync, &dev->flags)) {
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantread, &dev->flags);
+ s->locked++;
+ pr_debug("Reading block %d (sync=%d)\n",
+ disk_idx, s->syncing);
}
}
+
+ return 0;
+}
+
+/**
+ * handle_stripe_fill6 - read or compute data to satisfy pending requests.
+ */
+static void handle_stripe_fill6(struct stripe_head *sh,
+ struct stripe_head_state *s, struct r6_state *r6s,
+ int disks)
+{
+ int i;
+
+ /* look for blocks to read/compute, skip this if a compute
+ * is already in flight, or if the stripe contents are in the
+ * midst of changing due to a write
+ */
+ if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
+ !sh->reconstruct_state)
+ for (i = disks; i--; )
+ if (fetch_block6(sh, s, r6s, i, disks))
+ break;
set_bit(STRIPE_HANDLE, &sh->state);
}
@@ -2361,114 +2509,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
*/
/* since handle_stripe can be called at any time we need to handle the
* case where a compute block operation has been submitted and then a
- * subsequent call wants to start a write request. raid5_run_ops only
- * handles the case where compute block and postxor are requested
+ * subsequent call wants to start a write request. raid_run_ops only
+ * handles the case where compute block and reconstruct are requested
* simultaneously. If this is not the case then new writes need to be
* held off until the compute completes.
*/
if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
(s->locked == 0 && (rcw == 0 || rmw == 0) &&
!test_bit(STRIPE_BIT_DELAY, &sh->state)))
- schedule_reconstruction5(sh, s, rcw == 0, 0);
+ schedule_reconstruction(sh, s, rcw == 0, 0);
}
static void handle_stripe_dirtying6(raid5_conf_t *conf,
struct stripe_head *sh, struct stripe_head_state *s,
struct r6_state *r6s, int disks)
{
- int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
+ int rcw = 0, pd_idx = sh->pd_idx, i;
int qd_idx = sh->qd_idx;
+
+ set_bit(STRIPE_HANDLE, &sh->state);
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
- /* Would I have to read this buffer for reconstruct_write */
- if (!test_bit(R5_OVERWRITE, &dev->flags)
- && i != pd_idx && i != qd_idx
- && (!test_bit(R5_LOCKED, &dev->flags)
- ) &&
- !test_bit(R5_UPTODATE, &dev->flags)) {
- if (test_bit(R5_Insync, &dev->flags)) rcw++;
- else {
- pr_debug("raid6: must_compute: "
- "disk %d flags=%#lx\n", i, dev->flags);
- must_compute++;
+ /* check if we haven't enough data */
+ if (!test_bit(R5_OVERWRITE, &dev->flags) &&
+ i != pd_idx && i != qd_idx &&
+ !test_bit(R5_LOCKED, &dev->flags) &&
+ !(test_bit(R5_UPTODATE, &dev->flags) ||
+ test_bit(R5_Wantcompute, &dev->flags))) {
+ rcw++;
+ if (!test_bit(R5_Insync, &dev->flags))
+ continue; /* it's a failed drive */
+
+ if (
+ test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ pr_debug("Read_old stripe %llu "
+ "block %d for Reconstruct\n",
+ (unsigned long long)sh->sector, i);
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantread, &dev->flags);
+ s->locked++;
+ } else {
+ pr_debug("Request delayed stripe %llu "
+ "block %d for Reconstruct\n",
+ (unsigned long long)sh->sector, i);
+ set_bit(STRIPE_DELAYED, &sh->state);
+ set_bit(STRIPE_HANDLE, &sh->state);
}
}
}
- pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
- (unsigned long long)sh->sector, rcw, must_compute);
- set_bit(STRIPE_HANDLE, &sh->state);
-
- if (rcw > 0)
- /* want reconstruct write, but need to get some data */
- for (i = disks; i--; ) {
- struct r5dev *dev = &sh->dev[i];
- if (!test_bit(R5_OVERWRITE, &dev->flags)
- && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
- && !test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags) &&
- test_bit(R5_Insync, &dev->flags)) {
- if (
- test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- pr_debug("Read_old stripe %llu "
- "block %d for Reconstruct\n",
- (unsigned long long)sh->sector, i);
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantread, &dev->flags);
- s->locked++;
- } else {
- pr_debug("Request delayed stripe %llu "
- "block %d for Reconstruct\n",
- (unsigned long long)sh->sector, i);
- set_bit(STRIPE_DELAYED, &sh->state);
- set_bit(STRIPE_HANDLE, &sh->state);
- }
- }
- }
/* now if nothing is locked, and if we have enough data, we can start a
* write request
*/
- if (s->locked == 0 && rcw == 0 &&
+ if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
+ s->locked == 0 && rcw == 0 &&
!test_bit(STRIPE_BIT_DELAY, &sh->state)) {
- if (must_compute > 0) {
- /* We have failed blocks and need to compute them */
- switch (s->failed) {
- case 0:
- BUG();
- case 1:
- compute_block_1(sh, r6s->failed_num[0], 0);
- break;
- case 2:
- compute_block_2(sh, r6s->failed_num[0],
- r6s->failed_num[1]);
- break;
- default: /* This request should have been failed? */
- BUG();
- }
- }
-
- pr_debug("Computing parity for stripe %llu\n",
- (unsigned long long)sh->sector);
- compute_parity6(sh, RECONSTRUCT_WRITE);
- /* now every locked buffer is ready to be written */
- for (i = disks; i--; )
- if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
- pr_debug("Writing stripe %llu block %d\n",
- (unsigned long long)sh->sector, i);
- s->locked++;
- set_bit(R5_Wantwrite, &sh->dev[i].flags);
- }
- if (s->locked == disks)
- if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
- atomic_inc(&conf->pending_full_writes);
- /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
- set_bit(STRIPE_INSYNC, &sh->state);
-
- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- atomic_dec(&conf->preread_active_stripes);
- if (atomic_read(&conf->preread_active_stripes) <
- IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
- }
+ schedule_reconstruction(sh, s, 1, 0);
}
}
@@ -2527,7 +2622,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
* we are done. Otherwise update the mismatch count and repair
* parity if !MD_RECOVERY_CHECK
*/
- if (sh->ops.zero_sum_result == 0)
+ if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
/* parity is correct (on disc,
* not in buffer any more)
*/
@@ -2544,6 +2639,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
set_bit(R5_Wantcompute,
&sh->dev[sh->pd_idx].flags);
sh->ops.target = sh->pd_idx;
+ sh->ops.target2 = -1;
s->uptodate++;
}
}
@@ -2560,67 +2656,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
- struct stripe_head_state *s,
- struct r6_state *r6s, struct page *tmp_page,
- int disks)
+ struct stripe_head_state *s,
+ struct r6_state *r6s, int disks)
{
- int update_p = 0, update_q = 0;
- struct r5dev *dev;
int pd_idx = sh->pd_idx;
int qd_idx = sh->qd_idx;
+ struct r5dev *dev;
set_bit(STRIPE_HANDLE, &sh->state);
BUG_ON(s->failed > 2);
- BUG_ON(s->uptodate < disks);
+
/* Want to check and possibly repair P and Q.
* However there could be one 'failed' device, in which
* case we can only check one of them, possibly using the
* other to generate missing data
*/
- /* If !tmp_page, we cannot do the calculations,
- * but as we have set STRIPE_HANDLE, we will soon be called
- * by stripe_handle with a tmp_page - just wait until then.
- */
- if (tmp_page) {
+ switch (sh->check_state) {
+ case check_state_idle:
+ /* start a new check operation if there are < 2 failures */
if (s->failed == r6s->q_failed) {
- /* The only possible failed device holds 'Q', so it
+ /* The only possible failed device holds Q, so it
* makes sense to check P (If anything else were failed,
* we would have used P to recreate it).
*/
- compute_block_1(sh, pd_idx, 1);
- if (!page_is_zero(sh->dev[pd_idx].page)) {
- compute_block_1(sh, pd_idx, 0);
- update_p = 1;
- }
+ sh->check_state = check_state_run;
}
if (!r6s->q_failed && s->failed < 2) {
- /* q is not failed, and we didn't use it to generate
+ /* Q is not failed, and we didn't use it to generate
* anything, so it makes sense to check it
*/
- memcpy(page_address(tmp_page),
- page_address(sh->dev[qd_idx].page),
- STRIPE_SIZE);
- compute_parity6(sh, UPDATE_PARITY);
- if (memcmp(page_address(tmp_page),
- page_address(sh->dev[qd_idx].page),
- STRIPE_SIZE) != 0) {
- clear_bit(STRIPE_INSYNC, &sh->state);
- update_q = 1;
- }
+ if (sh->check_state == check_state_run)
+ sh->check_state = check_state_run_pq;
+ else
+ sh->check_state = check_state_run_q;
}
- if (update_p || update_q) {
- conf->mddev->resync_mismatches += STRIPE_SECTORS;
- if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
- /* don't try to repair!! */
- update_p = update_q = 0;
+
+ /* discard potentially stale zero_sum_result */
+ sh->ops.zero_sum_result = 0;
+
+ if (sh->check_state == check_state_run) {
+ /* async_xor_zero_sum destroys the contents of P */
+ clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+ s->uptodate--;
+ }
+ if (sh->check_state >= check_state_run &&
+ sh->check_state <= check_state_run_pq) {
+ /* async_syndrome_zero_sum preserves P and Q, so
+ * no need to mark them !uptodate here
+ */
+ set_bit(STRIPE_OP_CHECK, &s->ops_request);
+ break;
}
+ /* we have 2-disk failure */
+ BUG_ON(s->failed != 2);
+ /* fall through */
+ case check_state_compute_result:
+ sh->check_state = check_state_idle;
+
+ /* check that a write has not made the stripe insync */
+ if (test_bit(STRIPE_INSYNC, &sh->state))
+ break;
+
/* now write out any block on a failed drive,
- * or P or Q if they need it
+ * or P or Q if they were recomputed
*/
-
+ BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
if (s->failed == 2) {
dev = &sh->dev[r6s->failed_num[1]];
s->locked++;
@@ -2633,14 +2736,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
}
-
- if (update_p) {
+ if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
dev = &sh->dev[pd_idx];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
}
- if (update_q) {
+ if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
dev = &sh->dev[qd_idx];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
@@ -2649,6 +2751,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
clear_bit(STRIPE_DEGRADED, &sh->state);
set_bit(STRIPE_INSYNC, &sh->state);
+ break;
+ case check_state_run:
+ case check_state_run_q:
+ case check_state_run_pq:
+ break; /* we will be called again upon completion */
+ case check_state_check_result:
+ sh->check_state = check_state_idle;
+
+ /* handle a successful check operation, if parity is correct
+ * we are done. Otherwise update the mismatch count and repair
+ * parity if !MD_RECOVERY_CHECK
+ */
+ if (sh->ops.zero_sum_result == 0) {
+ /* both parities are correct */
+ if (!s->failed)
+ set_bit(STRIPE_INSYNC, &sh->state);
+ else {
+ /* in contrast to the raid5 case we can validate
+ * parity, but still have a failure to write
+ * back
+ */
+ sh->check_state = check_state_compute_result;
+ /* Returning at this point means that we may go
+ * off and bring p and/or q uptodate again so
+ * we make sure to check zero_sum_result again
+ * to verify if p or q need writeback
+ */
+ }
+ } else {
+ conf->mddev->resync_mismatches += STRIPE_SECTORS;
+ if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+ /* don't try to repair!! */
+ set_bit(STRIPE_INSYNC, &sh->state);
+ else {
+ int *target = &sh->ops.target;
+
+ sh->ops.target = -1;
+ sh->ops.target2 = -1;
+ sh->check_state = check_state_compute_run;
+ set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+ set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+ if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
+ set_bit(R5_Wantcompute,
+ &sh->dev[pd_idx].flags);
+ *target = pd_idx;
+ target = &sh->ops.target2;
+ s->uptodate++;
+ }
+ if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
+ set_bit(R5_Wantcompute,
+ &sh->dev[qd_idx].flags);
+ *target = qd_idx;
+ s->uptodate++;
+ }
+ }
+ }
+ break;
+ case check_state_compute_run:
+ break;
+ default:
+ printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
+ __func__, sh->check_state,
+ (unsigned long long) sh->sector);
+ BUG();
}
}
@@ -2666,6 +2832,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
if (i != sh->pd_idx && i != sh->qd_idx) {
int dd_idx, j;
struct stripe_head *sh2;
+ struct async_submit_ctl submit;
sector_t bn = compute_blocknr(sh, i, 1);
sector_t s = raid5_compute_sector(conf, bn, 0,
@@ -2685,9 +2852,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
}
/* place all the copies on one channel */
+ init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
tx = async_memcpy(sh2->dev[dd_idx].page,
- sh->dev[i].page, 0, 0, STRIPE_SIZE,
- ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+ sh->dev[i].page, 0, 0, STRIPE_SIZE,
+ &submit);
set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
@@ -2756,7 +2924,8 @@ static bool handle_stripe5(struct stripe_head *sh)
rcu_read_lock();
for (i=disks; i--; ) {
mdk_rdev_t *rdev;
- struct r5dev *dev = &sh->dev[i];
+
+ dev = &sh->dev[i];
clear_bit(R5_Insync, &dev->flags);
pr_debug("check %d: state 0x%lx toread %p read %p write %p "
@@ -2973,7 +3142,7 @@ static bool handle_stripe5(struct stripe_head *sh)
/* Need to write out all blocks after computing parity */
sh->disks = conf->raid_disks;
stripe_set_idx(sh->sector, conf, 0, sh);
- schedule_reconstruction5(sh, &s, 1, 1);
+ schedule_reconstruction(sh, &s, 1, 1);
} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes);
@@ -2993,7 +3162,7 @@ static bool handle_stripe5(struct stripe_head *sh)
md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
if (s.ops_request)
- raid5_run_ops(sh, s.ops_request);
+ raid_run_ops(sh, s.ops_request);
ops_run_io(sh, &s);
@@ -3002,7 +3171,7 @@ static bool handle_stripe5(struct stripe_head *sh)
return blocked_rdev == NULL;
}
-static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe6(struct stripe_head *sh)
{
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks;
@@ -3014,9 +3183,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
mdk_rdev_t *blocked_rdev = NULL;
pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
- "pd_idx=%d, qd_idx=%d\n",
+ "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
(unsigned long long)sh->sector, sh->state,
- atomic_read(&sh->count), pd_idx, qd_idx);
+ atomic_read(&sh->count), pd_idx, qd_idx,
+ sh->check_state, sh->reconstruct_state);
memset(&s, 0, sizeof(s));
spin_lock(&sh->lock);
@@ -3036,35 +3206,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
i, dev->flags, dev->toread, dev->towrite, dev->written);
- /* maybe we can reply to a read */
- if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
- struct bio *rbi, *rbi2;
- pr_debug("Return read for disc %d\n", i);
- spin_lock_irq(&conf->device_lock);
- rbi = dev->toread;
- dev->toread = NULL;
- if (test_and_clear_bit(R5_Overlap, &dev->flags))
- wake_up(&conf->wait_for_overlap);
- spin_unlock_irq(&conf->device_lock);
- while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
- copy_data(0, rbi, dev->page, dev->sector);
- rbi2 = r5_next_bio(rbi, dev->sector);
- spin_lock_irq(&conf->device_lock);
- if (!raid5_dec_bi_phys_segments(rbi)) {
- rbi->bi_next = return_bi;
- return_bi = rbi;
- }
- spin_unlock_irq(&conf->device_lock);
- rbi = rbi2;
- }
- }
+ /* maybe we can reply to a read
+ *
+ * new wantfill requests are only permitted while
+ * ops_complete_biofill is guaranteed to be inactive
+ */
+ if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
+ !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
+ set_bit(R5_Wantfill, &dev->flags);
/* now count some things */
if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
+ if (test_bit(R5_Wantcompute, &dev->flags)) {
+ s.compute++;
+ BUG_ON(s.compute > 2);
+ }
-
- if (dev->toread)
+ if (test_bit(R5_Wantfill, &dev->flags)) {
+ s.to_fill++;
+ } else if (dev->toread)
s.to_read++;
if (dev->towrite) {
s.to_write++;
@@ -3105,6 +3266,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
blocked_rdev = NULL;
}
+ if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
+ set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
+ set_bit(STRIPE_BIOFILL_RUN, &sh->state);
+ }
+
pr_debug("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3145,19 +3311,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
* or to load a block that is being partially written.
*/
if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
- (s.syncing && (s.uptodate < disks)) || s.expanding)
+ (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
handle_stripe_fill6(sh, &s, &r6s, disks);
- /* now to consider writing and what else, if anything should be read */
- if (s.to_write)
+ /* Now we check to see if any write operations have recently
+ * completed
+ */
+ if (sh->reconstruct_state == reconstruct_state_drain_result) {
+ int qd_idx = sh->qd_idx;
+
+ sh->reconstruct_state = reconstruct_state_idle;
+ /* All the 'written' buffers and the parity blocks are ready to
+ * be written back to disk
+ */
+ BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+ BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
+ for (i = disks; i--; ) {
+ dev = &sh->dev[i];
+ if (test_bit(R5_LOCKED, &dev->flags) &&
+ (i == sh->pd_idx || i == qd_idx ||
+ dev->written)) {
+ pr_debug("Writing block %d\n", i);
+ BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
+ set_bit(R5_Wantwrite, &dev->flags);
+ if (!test_bit(R5_Insync, &dev->flags) ||
+ ((i == sh->pd_idx || i == qd_idx) &&
+ s.failed == 0))
+ set_bit(STRIPE_INSYNC, &sh->state);
+ }
+ }
+ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ atomic_dec(&conf->preread_active_stripes);
+ if (atomic_read(&conf->preread_active_stripes) <
+ IO_THRESHOLD)
+ md_wakeup_thread(conf->mddev->thread);
+ }
+ }
+
+ /* Now to consider new write requests and what else, if anything
+ * should be read. We do not handle new writes when:
+ * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
+ * 2/ A 'check' operation is in flight, as it may clobber the parity
+ * block.
+ */
+ if (s.to_write && !sh->reconstruct_state && !sh->check_state)
handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
/* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough
- * data is available
+ * data is available. The parity check is held off while parity
+ * dependent operations are in flight.
*/
- if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
- handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks);
+ if (sh->check_state ||
+ (s.syncing && s.locked == 0 &&
+ !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
+ !test_bit(STRIPE_INSYNC, &sh->state)))
+ handle_parity_checks6(conf, sh, &s, &r6s, disks);
if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
md_done_sync(conf->mddev, STRIPE_SECTORS,1);
@@ -3178,15 +3387,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
set_bit(R5_Wantwrite, &dev->flags);
set_bit(R5_ReWrite, &dev->flags);
set_bit(R5_LOCKED, &dev->flags);
+ s.locked++;
} else {
/* let's read it back */
set_bit(R5_Wantread, &dev->flags);
set_bit(R5_LOCKED, &dev->flags);
+ s.locked++;
}
}
}
- if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+ /* Finish reconstruct operations initiated by the expansion process */
+ if (sh->reconstruct_state == reconstruct_state_result) {
+ sh->reconstruct_state = reconstruct_state_idle;
+ clear_bit(STRIPE_EXPANDING, &sh->state);
+ for (i = conf->raid_disks; i--; ) {
+ set_bit(R5_Wantwrite, &sh->dev[i].flags);
+ set_bit(R5_LOCKED, &sh->dev[i].flags);
+ s.locked++;
+ }
+ }
+
+ if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
+ !sh->reconstruct_state) {
struct stripe_head *sh2
= get_active_stripe(conf, sh->sector, 1, 1, 1);
if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
@@ -3207,14 +3430,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
/* Need to write out all blocks after computing P&Q */
sh->disks = conf->raid_disks;
stripe_set_idx(sh->sector, conf, 0, sh);
- compute_parity6(sh, RECONSTRUCT_WRITE);
- for (i = conf->raid_disks ; i-- ; ) {
- set_bit(R5_LOCKED, &sh->dev[i].flags);
- s.locked++;
- set_bit(R5_Wantwrite, &sh->dev[i].flags);
- }
- clear_bit(STRIPE_EXPANDING, &sh->state);
- } else if (s.expanded) {
+ schedule_reconstruction(sh, &s, 1, 1);
+ } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes);
wake_up(&conf->wait_for_overlap);
@@ -3232,6 +3449,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
if (unlikely(blocked_rdev))
md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+ if (s.ops_request)
+ raid_run_ops(sh, s.ops_request);
+
ops_run_io(sh, &s);
return_io(return_bi);
@@ -3240,16 +3460,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
}
/* returns true if the stripe was handled */
-static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe(struct stripe_head *sh)
{
if (sh->raid_conf->level == 6)
- return handle_stripe6(sh, tmp_page);
+ return handle_stripe6(sh);
else
return handle_stripe5(sh);
}
-
-
static void raid5_activate_delayed(raid5_conf_t *conf)
{
if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@ -3331,6 +3549,9 @@ static int raid5_congested(void *data, int bits)
/* No difference between reads and writes. Just check
* how busy the stripe_cache is
*/
+
+ if (mddev_congested(mddev, bits))
+ return 1;
if (conf->inactive_blocked)
return 1;
if (conf->quiesce)
@@ -3880,7 +4101,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
INIT_LIST_HEAD(&stripes);
for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
int j;
- int skipped = 0;
+ int skipped_disk = 0;
sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
set_bit(STRIPE_EXPANDING, &sh->state);
atomic_inc(&conf->reshape_stripes);
@@ -3896,14 +4117,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
continue;
s = compute_blocknr(sh, j, 0);
if (s < raid5_size(mddev, 0, 0)) {
- skipped = 1;
+ skipped_disk = 1;
continue;
}
memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
set_bit(R5_Expanded, &sh->dev[j].flags);
set_bit(R5_UPTODATE, &sh->dev[j].flags);
}
- if (!skipped) {
+ if (!skipped_disk) {
set_bit(STRIPE_EXPAND_READY, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
}
@@ -4057,7 +4278,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
spin_unlock(&sh->lock);
/* wait for any blocked device to be handled */
- while(unlikely(!handle_stripe(sh, NULL)))
+ while (unlikely(!handle_stripe(sh)))
;
release_stripe(sh);
@@ -4114,7 +4335,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
return handled;
}
- handle_stripe(sh, NULL);
+ handle_stripe(sh);
release_stripe(sh);
handled++;
}
@@ -4128,6 +4349,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
return handled;
}
+#ifdef CONFIG_MULTICORE_RAID456
+static void __process_stripe(void *param, async_cookie_t cookie)
+{
+ struct stripe_head *sh = param;
+
+ handle_stripe(sh);
+ release_stripe(sh);
+}
+
+static void process_stripe(struct stripe_head *sh, struct list_head *domain)
+{
+ async_schedule_domain(__process_stripe, sh, domain);
+}
+
+static void synchronize_stripe_processing(struct list_head *domain)
+{
+ async_synchronize_full_domain(domain);
+}
+#else
+static void process_stripe(struct stripe_head *sh, struct list_head *domain)
+{
+ handle_stripe(sh);
+ release_stripe(sh);
+ cond_resched();
+}
+
+static void synchronize_stripe_processing(struct list_head *domain)
+{
+}
+#endif
/*
@@ -4142,6 +4393,7 @@ static void raid5d(mddev_t *mddev)
struct stripe_head *sh;
raid5_conf_t *conf = mddev->private;
int handled;
+ LIST_HEAD(raid_domain);
pr_debug("+++ raid5d active\n");
@@ -4178,8 +4430,7 @@ static void raid5d(mddev_t *mddev)
spin_unlock_irq(&conf->device_lock);
handled++;
- handle_stripe(sh, conf->spare_page);
- release_stripe(sh);
+ process_stripe(sh, &raid_domain);
spin_lock_irq(&conf->device_lock);
}
@@ -4187,6 +4438,7 @@ static void raid5d(mddev_t *mddev)
spin_unlock_irq(&conf->device_lock);
+ synchronize_stripe_processing(&raid_domain);
async_tx_issue_pending_all();
unplug_slaves(mddev);
@@ -4319,15 +4571,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
return sectors * (raid_disks - conf->max_degraded);
}
+static void raid5_free_percpu(raid5_conf_t *conf)
+{
+ struct raid5_percpu *percpu;
+ unsigned long cpu;
+
+ if (!conf->percpu)
+ return;
+
+ get_online_cpus();
+ for_each_possible_cpu(cpu) {
+ percpu = per_cpu_ptr(conf->percpu, cpu);
+ safe_put_page(percpu->spare_page);
+ kfree(percpu->scribble);
+ }
+#ifdef CONFIG_HOTPLUG_CPU
+ unregister_cpu_notifier(&conf->cpu_notify);
+#endif
+ put_online_cpus();
+
+ free_percpu(conf->percpu);
+}
+
static void free_conf(raid5_conf_t *conf)
{
shrink_stripes(conf);
- safe_put_page(conf->spare_page);
+ raid5_free_percpu(conf);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
kfree(conf);
}
+#ifdef CONFIG_HOTPLUG_CPU
+static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
+{
+ raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
+ long cpu = (long)hcpu;
+ struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ if (conf->level == 6 && !percpu->spare_page)
+ percpu->spare_page = alloc_page(GFP_KERNEL);
+ if (!percpu->scribble)
+ percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
+
+ if (!percpu->scribble ||
+ (conf->level == 6 && !percpu->spare_page)) {
+ safe_put_page(percpu->spare_page);
+ kfree(percpu->scribble);
+ pr_err("%s: failed memory allocation for cpu%ld\n",
+ __func__, cpu);
+ return NOTIFY_BAD;
+ }
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ safe_put_page(percpu->spare_page);
+ kfree(percpu->scribble);
+ percpu->spare_page = NULL;
+ percpu->scribble = NULL;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+#endif
+
+static int raid5_alloc_percpu(raid5_conf_t *conf)
+{
+ unsigned long cpu;
+ struct page *spare_page;
+ struct raid5_percpu *allcpus;
+ void *scribble;
+ int err;
+
+ allcpus = alloc_percpu(struct raid5_percpu);
+ if (!allcpus)
+ return -ENOMEM;
+ conf->percpu = allcpus;
+
+ get_online_cpus();
+ err = 0;
+ for_each_present_cpu(cpu) {
+ if (conf->level == 6) {
+ spare_page = alloc_page(GFP_KERNEL);
+ if (!spare_page) {
+ err = -ENOMEM;
+ break;
+ }
+ per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
+ }
+ scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
+ if (!scribble) {
+ err = -ENOMEM;
+ break;
+ }
+ per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
+ }
+#ifdef CONFIG_HOTPLUG_CPU
+ conf->cpu_notify.notifier_call = raid456_cpu_notify;
+ conf->cpu_notify.priority = 0;
+ if (err == 0)
+ err = register_cpu_notifier(&conf->cpu_notify);
+#endif
+ put_online_cpus();
+
+ return err;
+}
+
static raid5_conf_t *setup_conf(mddev_t *mddev)
{
raid5_conf_t *conf;
@@ -4369,6 +4724,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
goto abort;
conf->raid_disks = mddev->raid_disks;
+ conf->scribble_len = scribble_len(conf->raid_disks);
if (mddev->reshape_position == MaxSector)
conf->previous_raid_disks = mddev->raid_disks;
else
@@ -4384,11 +4740,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
goto abort;
- if (mddev->new_level == 6) {
- conf->spare_page = alloc_page(GFP_KERNEL);
- if (!conf->spare_page)
- goto abort;
- }
+ conf->level = mddev->new_level;
+ if (raid5_alloc_percpu(conf) != 0)
+ goto abort;
+
spin_lock_init(&conf->device_lock);
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap);
@@ -4447,7 +4802,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
printk(KERN_INFO "raid5: allocated %dkB for %s\n",
memory, mdname(mddev));
- conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+ conf->thread = md_register_thread(raid5d, mddev, NULL);
if (!conf->thread) {
printk(KERN_ERR
"raid5: couldn't allocate thread for %s\n",
@@ -4613,7 +4968,7 @@ static int run(mddev_t *mddev)
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
- "%s_reshape");
+ "reshape");
}
/* read-ahead size must cover two whole stripes, which is
@@ -5031,7 +5386,7 @@ static int raid5_start_reshape(mddev_t *mddev)
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
- "%s_reshape");
+ "reshape");
if (!mddev->sync_thread) {
mddev->recovery = 0;
spin_lock_irq(&conf->device_lock);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 9459689c4ea..2390e0e83da 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -2,6 +2,7 @@
#define _RAID5_H
#include <linux/raid/xor.h>
+#include <linux/dmaengine.h>
/*
*
@@ -175,7 +176,9 @@
*/
enum check_states {
check_state_idle = 0,
- check_state_run, /* parity check */
+ check_state_run, /* xor parity check */
+ check_state_run_q, /* q-parity check */
+ check_state_run_pq, /* pq dual parity check */
check_state_check_result,
check_state_compute_run, /* parity repair */
check_state_compute_result,
@@ -215,8 +218,8 @@ struct stripe_head {
* @target - STRIPE_OP_COMPUTE_BLK target
*/
struct stripe_operations {
- int target;
- u32 zero_sum_result;
+ int target, target2;
+ enum sum_check_flags zero_sum_result;
} ops;
struct r5dev {
struct bio req;
@@ -298,7 +301,7 @@ struct r6_state {
#define STRIPE_OP_COMPUTE_BLK 1
#define STRIPE_OP_PREXOR 2
#define STRIPE_OP_BIODRAIN 3
-#define STRIPE_OP_POSTXOR 4
+#define STRIPE_OP_RECONSTRUCT 4
#define STRIPE_OP_CHECK 5
/*
@@ -385,8 +388,21 @@ struct raid5_private_data {
* (fresh device added).
* Cleared when a sync completes.
*/
-
- struct page *spare_page; /* Used when checking P/Q in raid6 */
+ /* per cpu variables */
+ struct raid5_percpu {
+ struct page *spare_page; /* Used when checking P/Q in raid6 */
+ void *scribble; /* space for constructing buffer
+ * lists and performing address
+ * conversions
+ */
+ } *percpu;
+ size_t scribble_len; /* size of scribble region must be
+ * associated with conf to handle
+ * cpu hotplug while reshaping
+ */
+#ifdef CONFIG_HOTPLUG_CPU
+ struct notifier_block cpu_notify;
+#endif
/*
* Free stripes pool
diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h
index 895e2efca8a..01fc7048474 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.h
+++ b/drivers/media/dvb/dvb-core/dvbdev.h
@@ -31,10 +31,9 @@
#define DVB_MAJOR 212
#if defined(CONFIG_DVB_MAX_ADAPTERS) && CONFIG_DVB_MAX_ADAPTERS > 0
-#define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS
+ #define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS
#else
-#warning invalid CONFIG_DVB_MAX_ADAPTERS value
-#define DVB_MAX_ADAPTERS 8
+ #define DVB_MAX_ADAPTERS 8
#endif
#define DVB_UNSET (-1)
diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig
index 0e4b97fba38..9744b069241 100644
--- a/drivers/media/dvb/dvb-usb/Kconfig
+++ b/drivers/media/dvb/dvb-usb/Kconfig
@@ -75,7 +75,7 @@ config DVB_USB_DIB0700
select DVB_DIB3000MC if !DVB_FE_CUSTOMISE
select DVB_S5H1411 if !DVB_FE_CUSTOMISE
select DVB_LGDT3305 if !DVB_FE_CUSTOMISE
- select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE
+ select DVB_TUNER_DIB0070
select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMISE
select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMISE
select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE
diff --git a/drivers/media/video/saa7164/saa7164-api.c b/drivers/media/video/saa7164/saa7164-api.c
index bb6df1b276b..6f094a96ac8 100644
--- a/drivers/media/video/saa7164/saa7164-api.c
+++ b/drivers/media/video/saa7164/saa7164-api.c
@@ -415,7 +415,7 @@ int saa7164_api_enum_subdevs(struct saa7164_dev *dev)
goto out;
}
- if (debug & DBGLVL_API)
+ if (saa_debug & DBGLVL_API)
saa7164_dumphex16(dev, buf, (buflen/16)*16);
saa7164_api_dump_subdevs(dev, buf, buflen);
@@ -480,7 +480,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg,
dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len);
- if (debug & DBGLVL_I2C)
+ if (saa_debug & DBGLVL_I2C)
saa7164_dumphex16(dev, buf, 2 * 16);
ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR,
@@ -488,7 +488,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg,
if (ret != SAA_OK)
printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret);
else {
- if (debug & DBGLVL_I2C)
+ if (saa_debug & DBGLVL_I2C)
saa7164_dumphex16(dev, buf, sizeof(buf));
memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen);
}
@@ -548,7 +548,7 @@ int saa7164_api_i2c_write(struct saa7164_i2c *bus, u8 addr, u32 datalen,
*((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen;
memcpy((buf + 2 * sizeof(u32)), data, datalen);
- if (debug & DBGLVL_I2C)
+ if (saa_debug & DBGLVL_I2C)
saa7164_dumphex16(dev, buf, sizeof(buf));
ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR,
diff --git a/drivers/media/video/saa7164/saa7164-cmd.c b/drivers/media/video/saa7164/saa7164-cmd.c
index e097f1a0969..c45966edc0c 100644
--- a/drivers/media/video/saa7164/saa7164-cmd.c
+++ b/drivers/media/video/saa7164/saa7164-cmd.c
@@ -250,7 +250,7 @@ int saa7164_cmd_wait(struct saa7164_dev *dev, u8 seqno)
unsigned long stamp;
int r;
- if (debug >= 4)
+ if (saa_debug >= 4)
saa7164_bus_dump(dev);
dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno);
diff --git a/drivers/media/video/saa7164/saa7164-core.c b/drivers/media/video/saa7164/saa7164-core.c
index f0dbead188c..709affc3104 100644
--- a/drivers/media/video/saa7164/saa7164-core.c
+++ b/drivers/media/video/saa7164/saa7164-core.c
@@ -45,8 +45,8 @@ MODULE_LICENSE("GPL");
32 bus
*/
-unsigned int debug;
-module_param(debug, int, 0644);
+unsigned int saa_debug;
+module_param_named(debug, saa_debug, int, 0644);
MODULE_PARM_DESC(debug, "enable debug messages");
unsigned int waitsecs = 10;
@@ -653,7 +653,7 @@ static int __devinit saa7164_initdev(struct pci_dev *pci_dev,
printk(KERN_ERR "%s() Unsupported board detected, "
"registering without firmware\n", __func__);
- dprintk(1, "%s() parameter debug = %d\n", __func__, debug);
+ dprintk(1, "%s() parameter debug = %d\n", __func__, saa_debug);
dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs);
fail_fw:
diff --git a/drivers/media/video/saa7164/saa7164.h b/drivers/media/video/saa7164/saa7164.h
index 6753008a9c9..42660b546f0 100644
--- a/drivers/media/video/saa7164/saa7164.h
+++ b/drivers/media/video/saa7164/saa7164.h
@@ -375,9 +375,9 @@ extern int saa7164_buffer_dealloc(struct saa7164_tsport *port,
/* ----------------------------------------------------------- */
-extern unsigned int debug;
+extern unsigned int saa_debug;
#define dprintk(level, fmt, arg...)\
- do { if (debug & level)\
+ do { if (saa_debug & level)\
printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\
} while (0)
diff --git a/drivers/media/video/usbvision/usbvision-core.c b/drivers/media/video/usbvision/usbvision-core.c
index 6ba16abeebd..e0f91e4ab65 100644
--- a/drivers/media/video/usbvision/usbvision-core.c
+++ b/drivers/media/video/usbvision/usbvision-core.c
@@ -28,7 +28,6 @@
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
diff --git a/drivers/media/video/usbvision/usbvision-i2c.c b/drivers/media/video/usbvision/usbvision-i2c.c
index f97fd06d594..c19f51dba2e 100644
--- a/drivers/media/video/usbvision/usbvision-i2c.c
+++ b/drivers/media/video/usbvision/usbvision-i2c.c
@@ -28,7 +28,6 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/slab.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <asm/uaccess.h>
#include <linux/ioport.h>
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index 90d9b5c0e9a..a2a50d608a3 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -52,7 +52,6 @@
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index a5b448ea4ea..b3bf1c44d74 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -339,9 +339,9 @@ static int h_memstick_read_dev_id(struct memstick_dev *card,
card->id.type = id_reg.type;
card->id.category = id_reg.category;
card->id.class = id_reg.class;
+ dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode);
}
complete(&card->mrq_complete);
- dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode);
return -EAGAIN;
}
}
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index 79689b10f93..766e21e1557 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -937,6 +937,8 @@ static int quicktest1(unsigned long arg)
/* Need 1K cacheline aligned that does not cross page boundary */
p = kmalloc(4096, 0);
+ if (p == NULL)
+ return -ENOMEM;
mq = ALIGNUP(p, 1024);
memset(mes, 0xee, sizeof(mes));
dw = mq;
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index 9cbf95bedce..ccd4408a26c 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -340,10 +340,9 @@ static struct proc_dir_entry *proc_gru __read_mostly;
static int create_proc_file(struct proc_entry *p)
{
- p->entry = create_proc_entry(p->name, p->mode, proc_gru);
+ p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
if (!p->entry)
return -1;
- p->entry->proc_fops = p->fops;
return 0;
}
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 065fa818be5..fc25586b7ee 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -599,6 +599,7 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
struct scatterlist *sg;
unsigned int i;
enum dma_data_direction direction;
+ unsigned int sglen;
/*
* We don't do DMA on "complex" transfers, i.e. with
@@ -628,11 +629,14 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
else
direction = DMA_TO_DEVICE;
+ sglen = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, direction);
+ if (sglen != data->sg_len)
+ goto unmap_exit;
desc = chan->device->device_prep_slave_sg(chan,
data->sg, data->sg_len, direction,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc)
- return -ENOMEM;
+ goto unmap_exit;
host->dma.data_desc = desc;
desc->callback = atmci_dma_complete;
@@ -643,6 +647,9 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
chan->device->device_issue_pending(chan);
return 0;
+unmap_exit:
+ dma_unmap_sg(&host->pdev->dev, data->sg, sglen, direction);
+ return -ENOMEM;
}
#else /* CONFIG_MMC_ATMELMCI_DMA */
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index e4ec3659759..ecf90f5c97c 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -159,7 +159,7 @@ config MTD_AFS_PARTS
config MTD_OF_PARTS
tristate "Flash partition map based on OF description"
- depends on PPC_OF && MTD_PARTITIONS
+ depends on (MICROBLAZE || PPC_OF) && MTD_PARTITIONS
help
This provides a partition parsing function which derives
the partition map from the children of the flash node,
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 3a9a960644b..841e085ab74 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -74,7 +74,7 @@ config MTD_PHYSMAP_BANKWIDTH
config MTD_PHYSMAP_OF
tristate "Flash device in physical memory map based on OF description"
- depends on PPC_OF && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM)
+ depends on (MICROBLAZE || PPC_OF) && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM)
help
This provides a 'mapping' driver which allows the NOR Flash and
ROM driver code to communicate with chips which are mapped
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 7adff4d0960..b9eeadf01b7 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -813,10 +813,10 @@ static int vortex_suspend(struct pci_dev *pdev, pm_message_t state)
if (netif_running(dev)) {
netif_device_detach(dev);
vortex_down(dev, 1);
+ disable_irq(dev->irq);
}
pci_save_state(pdev);
pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
- free_irq(dev->irq, dev);
pci_disable_device(pdev);
pci_set_power_state(pdev, pci_choose_state(pdev, state));
}
@@ -839,18 +839,12 @@ static int vortex_resume(struct pci_dev *pdev)
return err;
}
pci_set_master(pdev);
- if (request_irq(dev->irq, vp->full_bus_master_rx ?
- &boomerang_interrupt : &vortex_interrupt, IRQF_SHARED, dev->name, dev)) {
- pr_warning("%s: Could not reserve IRQ %d\n", dev->name, dev->irq);
- pci_disable_device(pdev);
- return -EBUSY;
- }
if (netif_running(dev)) {
err = vortex_up(dev);
if (err)
return err;
- else
- netif_device_attach(dev);
+ enable_irq(dev->irq);
+ netif_device_attach(dev);
}
}
return 0;
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 462d9f59c53..83a1922e68e 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -87,7 +87,7 @@
/* These identify the driver base version and may not be removed. */
static char version[] =
-KERN_INFO DRV_NAME ": 10/100 PCI Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n";
+DRV_NAME ": 10/100 PCI Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n";
MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>");
MODULE_DESCRIPTION("RealTek RTL-8139C+ series 10/100 PCI Ethernet driver");
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index ed5741b2e70..2bea67c134f 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -1875,7 +1875,7 @@ config 68360_ENET
config FEC
bool "FEC ethernet controller (of ColdFire and some i.MX CPUs)"
- depends on M523x || M527x || M5272 || M528x || M520x || M532x || MACH_MX27 || ARCH_MX35
+ depends on M523x || M527x || M5272 || M528x || M520x || M532x || MACH_MX27 || ARCH_MX35 || ARCH_MX25
help
Say Y here if you want to use the built-in 10/100 Fast ethernet
controller on some Motorola ColdFire and Freescale i.MX processors.
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index be2c6cfe6e8..1372e9a99f5 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -2296,7 +2296,7 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
u32 ctrl;
u32 mac_ctrl_data;
u32 master_ctrl_data;
- u32 wol_ctrl_data;
+ u32 wol_ctrl_data = 0;
u16 mii_bmsr_data;
u16 save_autoneg_advertised;
u16 mii_intr_status_data;
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 09007437246..df32c109b7a 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -75,6 +75,13 @@ config CAN_EMS_PCI
CPC-PCIe and CPC-104P cards from EMS Dr. Thomas Wuensche
(http://www.ems-wuensche.de).
+config CAN_EMS_USB
+ tristate "EMS CPC-USB/ARM7 CAN/USB interface"
+ depends on USB && CAN_DEV
+ ---help---
+ This driver is for the one channel CPC-USB/ARM7 CAN/USB interface
+ from from EMS Dr. Thomas Wuensche (http://www.ems-wuensche.de).
+
config CAN_KVASER_PCI
tristate "Kvaser PCIcanx and Kvaser PCIcan PCI Cards"
depends on PCI && CAN_SJA1000
@@ -82,6 +89,12 @@ config CAN_KVASER_PCI
This driver is for the the PCIcanx and PCIcan cards (1, 2 or
4 channel) from Kvaser (http://www.kvaser.com).
+config CAN_AT91
+ tristate "Atmel AT91 onchip CAN controller"
+ depends on CAN && CAN_DEV && ARCH_AT91SAM9263
+ ---help---
+ This is a driver for the SoC CAN controller in Atmel's AT91SAM9263.
+
config CAN_DEBUG_DEVICES
bool "CAN devices debugging messages"
depends on CAN
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 523a941b358..0dea62721f2 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -7,6 +7,9 @@ obj-$(CONFIG_CAN_VCAN) += vcan.o
obj-$(CONFIG_CAN_DEV) += can-dev.o
can-dev-y := dev.o
+obj-y += usb/
+
obj-$(CONFIG_CAN_SJA1000) += sja1000/
+obj-$(CONFIG_CAN_AT91) += at91_can.o
ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/sja1000/ems_pci.c b/drivers/net/can/sja1000/ems_pci.c
index 7d84b8ac9c1..fd04789d337 100644
--- a/drivers/net/can/sja1000/ems_pci.c
+++ b/drivers/net/can/sja1000/ems_pci.c
@@ -94,12 +94,14 @@ struct ems_pci_card {
#define EMS_PCI_CDR (CDR_CBP | CDR_CLKOUT_MASK)
#define EMS_PCI_V1_BASE_BAR 1
-#define EMS_PCI_V1_MEM_SIZE 4096
+#define EMS_PCI_V1_CONF_SIZE 4096 /* size of PITA control area */
#define EMS_PCI_V2_BASE_BAR 2
-#define EMS_PCI_V2_MEM_SIZE 128
+#define EMS_PCI_V2_CONF_SIZE 128 /* size of PLX control area */
#define EMS_PCI_CAN_BASE_OFFSET 0x400 /* offset where the controllers starts */
#define EMS_PCI_CAN_CTRL_SIZE 0x200 /* memory size for each controller */
+#define EMS_PCI_BASE_SIZE 4096 /* size of controller area */
+
static struct pci_device_id ems_pci_tbl[] = {
/* CPC-PCI v1 */
{PCI_VENDOR_ID_SIEMENS, 0x2104, PCI_ANY_ID, PCI_ANY_ID,},
@@ -224,7 +226,7 @@ static int __devinit ems_pci_add_card(struct pci_dev *pdev,
struct sja1000_priv *priv;
struct net_device *dev;
struct ems_pci_card *card;
- int max_chan, mem_size, base_bar;
+ int max_chan, conf_size, base_bar;
int err, i;
/* Enabling PCI device */
@@ -251,22 +253,22 @@ static int __devinit ems_pci_add_card(struct pci_dev *pdev,
card->version = 2; /* CPC-PCI v2 */
max_chan = EMS_PCI_V2_MAX_CHAN;
base_bar = EMS_PCI_V2_BASE_BAR;
- mem_size = EMS_PCI_V2_MEM_SIZE;
+ conf_size = EMS_PCI_V2_CONF_SIZE;
} else {
card->version = 1; /* CPC-PCI v1 */
max_chan = EMS_PCI_V1_MAX_CHAN;
base_bar = EMS_PCI_V1_BASE_BAR;
- mem_size = EMS_PCI_V1_MEM_SIZE;
+ conf_size = EMS_PCI_V1_CONF_SIZE;
}
/* Remap configuration space and controller memory area */
- card->conf_addr = pci_iomap(pdev, 0, mem_size);
+ card->conf_addr = pci_iomap(pdev, 0, conf_size);
if (card->conf_addr == NULL) {
err = -ENOMEM;
goto failure_cleanup;
}
- card->base_addr = pci_iomap(pdev, base_bar, mem_size);
+ card->base_addr = pci_iomap(pdev, base_bar, EMS_PCI_BASE_SIZE);
if (card->base_addr == NULL) {
err = -ENOMEM;
goto failure_cleanup;
diff --git a/drivers/net/can/usb/Makefile b/drivers/net/can/usb/Makefile
new file mode 100644
index 00000000000..c3f75ba701b
--- /dev/null
+++ b/drivers/net/can/usb/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Linux Controller Area Network USB drivers.
+#
+
+obj-$(CONFIG_CAN_EMS_USB) += ems_usb.o
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
new file mode 100644
index 00000000000..9012e0abc62
--- /dev/null
+++ b/drivers/net/can/usb/ems_usb.c
@@ -0,0 +1,1155 @@
+/*
+ * CAN driver for EMS Dr. Thomas Wuensche CPC-USB/ARM7
+ *
+ * Copyright (C) 2004-2009 EMS Dr. Thomas Wuensche
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/usb.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+
+MODULE_AUTHOR("Sebastian Haas <haas@ems-wuensche.com>");
+MODULE_DESCRIPTION("CAN driver for EMS Dr. Thomas Wuensche CAN/USB interfaces");
+MODULE_LICENSE("GPL v2");
+
+/* Control-Values for CPC_Control() Command Subject Selection */
+#define CONTR_CAN_MESSAGE 0x04
+#define CONTR_CAN_STATE 0x0C
+#define CONTR_BUS_ERROR 0x1C
+
+/* Control Command Actions */
+#define CONTR_CONT_OFF 0
+#define CONTR_CONT_ON 1
+#define CONTR_ONCE 2
+
+/* Messages from CPC to PC */
+#define CPC_MSG_TYPE_CAN_FRAME 1 /* CAN data frame */
+#define CPC_MSG_TYPE_RTR_FRAME 8 /* CAN remote frame */
+#define CPC_MSG_TYPE_CAN_PARAMS 12 /* Actual CAN parameters */
+#define CPC_MSG_TYPE_CAN_STATE 14 /* CAN state message */
+#define CPC_MSG_TYPE_EXT_CAN_FRAME 16 /* Extended CAN data frame */
+#define CPC_MSG_TYPE_EXT_RTR_FRAME 17 /* Extended remote frame */
+#define CPC_MSG_TYPE_CONTROL 19 /* change interface behavior */
+#define CPC_MSG_TYPE_CONFIRM 20 /* command processed confirmation */
+#define CPC_MSG_TYPE_OVERRUN 21 /* overrun events */
+#define CPC_MSG_TYPE_CAN_FRAME_ERROR 23 /* detected bus errors */
+#define CPC_MSG_TYPE_ERR_COUNTER 25 /* RX/TX error counter */
+
+/* Messages from the PC to the CPC interface */
+#define CPC_CMD_TYPE_CAN_FRAME 1 /* CAN data frame */
+#define CPC_CMD_TYPE_CONTROL 3 /* control of interface behavior */
+#define CPC_CMD_TYPE_CAN_PARAMS 6 /* set CAN parameters */
+#define CPC_CMD_TYPE_RTR_FRAME 13 /* CAN remote frame */
+#define CPC_CMD_TYPE_CAN_STATE 14 /* CAN state message */
+#define CPC_CMD_TYPE_EXT_CAN_FRAME 15 /* Extended CAN data frame */
+#define CPC_CMD_TYPE_EXT_RTR_FRAME 16 /* Extended CAN remote frame */
+#define CPC_CMD_TYPE_CAN_EXIT 200 /* exit the CAN */
+
+#define CPC_CMD_TYPE_INQ_ERR_COUNTER 25 /* request the CAN error counters */
+#define CPC_CMD_TYPE_CLEAR_MSG_QUEUE 8 /* clear CPC_MSG queue */
+#define CPC_CMD_TYPE_CLEAR_CMD_QUEUE 28 /* clear CPC_CMD queue */
+
+#define CPC_CC_TYPE_SJA1000 2 /* Philips basic CAN controller */
+
+#define CPC_CAN_ECODE_ERRFRAME 0x01 /* Ecode type */
+
+/* Overrun types */
+#define CPC_OVR_EVENT_CAN 0x01
+#define CPC_OVR_EVENT_CANSTATE 0x02
+#define CPC_OVR_EVENT_BUSERROR 0x04
+
+/*
+ * If the CAN controller lost a message we indicate it with the highest bit
+ * set in the count field.
+ */
+#define CPC_OVR_HW 0x80
+
+/* Size of the "struct ems_cpc_msg" without the union */
+#define CPC_MSG_HEADER_LEN 11
+#define CPC_CAN_MSG_MIN_SIZE 5
+
+/* Define these values to match your devices */
+#define USB_CPCUSB_VENDOR_ID 0x12D6
+
+#define USB_CPCUSB_ARM7_PRODUCT_ID 0x0444
+
+/* Mode register NXP LPC2119/SJA1000 CAN Controller */
+#define SJA1000_MOD_NORMAL 0x00
+#define SJA1000_MOD_RM 0x01
+
+/* ECC register NXP LPC2119/SJA1000 CAN Controller */
+#define SJA1000_ECC_SEG 0x1F
+#define SJA1000_ECC_DIR 0x20
+#define SJA1000_ECC_ERR 0x06
+#define SJA1000_ECC_BIT 0x00
+#define SJA1000_ECC_FORM 0x40
+#define SJA1000_ECC_STUFF 0x80
+#define SJA1000_ECC_MASK 0xc0
+
+/* Status register content */
+#define SJA1000_SR_BS 0x80
+#define SJA1000_SR_ES 0x40
+
+#define SJA1000_DEFAULT_OUTPUT_CONTROL 0xDA
+
+/*
+ * The device actually uses a 16MHz clock to generate the CAN clock
+ * but it expects SJA1000 bit settings based on 8MHz (is internally
+ * converted).
+ */
+#define EMS_USB_ARM7_CLOCK 8000000
+
+/*
+ * CAN-Message representation in a CPC_MSG. Message object type is
+ * CPC_MSG_TYPE_CAN_FRAME or CPC_MSG_TYPE_RTR_FRAME or
+ * CPC_MSG_TYPE_EXT_CAN_FRAME or CPC_MSG_TYPE_EXT_RTR_FRAME.
+ */
+struct cpc_can_msg {
+ u32 id;
+ u8 length;
+ u8 msg[8];
+};
+
+/* Representation of the CAN parameters for the SJA1000 controller */
+struct cpc_sja1000_params {
+ u8 mode;
+ u8 acc_code0;
+ u8 acc_code1;
+ u8 acc_code2;
+ u8 acc_code3;
+ u8 acc_mask0;
+ u8 acc_mask1;
+ u8 acc_mask2;
+ u8 acc_mask3;
+ u8 btr0;
+ u8 btr1;
+ u8 outp_contr;
+};
+
+/* CAN params message representation */
+struct cpc_can_params {
+ u8 cc_type;
+
+ /* Will support M16C CAN controller in the future */
+ union {
+ struct cpc_sja1000_params sja1000;
+ } cc_params;
+};
+
+/* Structure for confirmed message handling */
+struct cpc_confirm {
+ u8 error; /* error code */
+};
+
+/* Structure for overrun conditions */
+struct cpc_overrun {
+ u8 event;
+ u8 count;
+};
+
+/* SJA1000 CAN errors (compatible to NXP LPC2119) */
+struct cpc_sja1000_can_error {
+ u8 ecc;
+ u8 rxerr;
+ u8 txerr;
+};
+
+/* structure for CAN error conditions */
+struct cpc_can_error {
+ u8 ecode;
+
+ struct {
+ u8 cc_type;
+
+ /* Other controllers may also provide error code capture regs */
+ union {
+ struct cpc_sja1000_can_error sja1000;
+ } regs;
+ } cc;
+};
+
+/*
+ * Structure containing RX/TX error counter. This structure is used to request
+ * the values of the CAN controllers TX and RX error counter.
+ */
+struct cpc_can_err_counter {
+ u8 rx;
+ u8 tx;
+};
+
+/* Main message type used between library and application */
+struct __attribute__ ((packed)) ems_cpc_msg {
+ u8 type; /* type of message */
+ u8 length; /* length of data within union 'msg' */
+ u8 msgid; /* confirmation handle */
+ u32 ts_sec; /* timestamp in seconds */
+ u32 ts_nsec; /* timestamp in nano seconds */
+
+ union {
+ u8 generic[64];
+ struct cpc_can_msg can_msg;
+ struct cpc_can_params can_params;
+ struct cpc_confirm confirmation;
+ struct cpc_overrun overrun;
+ struct cpc_can_error error;
+ struct cpc_can_err_counter err_counter;
+ u8 can_state;
+ } msg;
+};
+
+/*
+ * Table of devices that work with this driver
+ * NOTE: This driver supports only CPC-USB/ARM7 (LPC2119) yet.
+ */
+static struct usb_device_id ems_usb_table[] = {
+ {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_ARM7_PRODUCT_ID)},
+ {} /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, ems_usb_table);
+
+#define RX_BUFFER_SIZE 64
+#define CPC_HEADER_SIZE 4
+#define INTR_IN_BUFFER_SIZE 4
+
+#define MAX_RX_URBS 10
+#define MAX_TX_URBS CAN_ECHO_SKB_MAX
+
+struct ems_usb;
+
+struct ems_tx_urb_context {
+ struct ems_usb *dev;
+
+ u32 echo_index;
+ u8 dlc;
+};
+
+struct ems_usb {
+ struct can_priv can; /* must be the first member */
+ int open_time;
+
+ struct sk_buff *echo_skb[MAX_TX_URBS];
+
+ struct usb_device *udev;
+ struct net_device *netdev;
+
+ atomic_t active_tx_urbs;
+ struct usb_anchor tx_submitted;
+ struct ems_tx_urb_context tx_contexts[MAX_TX_URBS];
+
+ struct usb_anchor rx_submitted;
+
+ struct urb *intr_urb;
+
+ u8 *tx_msg_buffer;
+
+ u8 *intr_in_buffer;
+ unsigned int free_slots; /* remember number of available slots */
+
+ struct ems_cpc_msg active_params; /* active controller parameters */
+};
+
+static void ems_usb_read_interrupt_callback(struct urb *urb)
+{
+ struct ems_usb *dev = urb->context;
+ struct net_device *netdev = dev->netdev;
+ int err;
+
+ if (!netif_device_present(netdev))
+ return;
+
+ switch (urb->status) {
+ case 0:
+ dev->free_slots = dev->intr_in_buffer[1];
+ break;
+
+ case -ECONNRESET: /* unlink */
+ case -ENOENT:
+ case -ESHUTDOWN:
+ return;
+
+ default:
+ dev_info(netdev->dev.parent, "Rx interrupt aborted %d\n",
+ urb->status);
+ break;
+ }
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+
+ if (err == -ENODEV)
+ netif_device_detach(netdev);
+ else if (err)
+ dev_err(netdev->dev.parent,
+ "failed resubmitting intr urb: %d\n", err);
+
+ return;
+}
+
+static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
+{
+ struct can_frame *cf;
+ struct sk_buff *skb;
+ int i;
+ struct net_device_stats *stats = &dev->netdev->stats;
+
+ skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame));
+ if (skb == NULL)
+ return;
+
+ skb->protocol = htons(ETH_P_CAN);
+
+ cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+
+ cf->can_id = msg->msg.can_msg.id;
+ cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8);
+
+ if (msg->type == CPC_MSG_TYPE_EXT_CAN_FRAME
+ || msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME)
+ cf->can_id |= CAN_EFF_FLAG;
+
+ if (msg->type == CPC_MSG_TYPE_RTR_FRAME
+ || msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME) {
+ cf->can_id |= CAN_RTR_FLAG;
+ } else {
+ for (i = 0; i < cf->can_dlc; i++)
+ cf->data[i] = msg->msg.can_msg.msg[i];
+ }
+
+ netif_rx(skb);
+
+ stats->rx_packets++;
+ stats->rx_bytes += cf->can_dlc;
+}
+
+static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg)
+{
+ struct can_frame *cf;
+ struct sk_buff *skb;
+ struct net_device_stats *stats = &dev->netdev->stats;
+
+ skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame));
+ if (skb == NULL)
+ return;
+
+ skb->protocol = htons(ETH_P_CAN);
+
+ cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+ memset(cf, 0, sizeof(struct can_frame));
+
+ cf->can_id = CAN_ERR_FLAG;
+ cf->can_dlc = CAN_ERR_DLC;
+
+ if (msg->type == CPC_MSG_TYPE_CAN_STATE) {
+ u8 state = msg->msg.can_state;
+
+ if (state & SJA1000_SR_BS) {
+ dev->can.state = CAN_STATE_BUS_OFF;
+ cf->can_id |= CAN_ERR_BUSOFF;
+
+ can_bus_off(dev->netdev);
+ } else if (state & SJA1000_SR_ES) {
+ dev->can.state = CAN_STATE_ERROR_WARNING;
+ dev->can.can_stats.error_warning++;
+ } else {
+ dev->can.state = CAN_STATE_ERROR_ACTIVE;
+ dev->can.can_stats.error_passive++;
+ }
+ } else if (msg->type == CPC_MSG_TYPE_CAN_FRAME_ERROR) {
+ u8 ecc = msg->msg.error.cc.regs.sja1000.ecc;
+ u8 txerr = msg->msg.error.cc.regs.sja1000.txerr;
+ u8 rxerr = msg->msg.error.cc.regs.sja1000.rxerr;
+
+ /* bus error interrupt */
+ dev->can.can_stats.bus_error++;
+ stats->rx_errors++;
+
+ cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
+
+ switch (ecc & SJA1000_ECC_MASK) {
+ case SJA1000_ECC_BIT:
+ cf->data[2] |= CAN_ERR_PROT_BIT;
+ break;
+ case SJA1000_ECC_FORM:
+ cf->data[2] |= CAN_ERR_PROT_FORM;
+ break;
+ case SJA1000_ECC_STUFF:
+ cf->data[2] |= CAN_ERR_PROT_STUFF;
+ break;
+ default:
+ cf->data[2] |= CAN_ERR_PROT_UNSPEC;
+ cf->data[3] = ecc & SJA1000_ECC_SEG;
+ break;
+ }
+
+ /* Error occured during transmission? */
+ if ((ecc & SJA1000_ECC_DIR) == 0)
+ cf->data[2] |= CAN_ERR_PROT_TX;
+
+ if (dev->can.state == CAN_STATE_ERROR_WARNING ||
+ dev->can.state == CAN_STATE_ERROR_PASSIVE) {
+ cf->data[1] = (txerr > rxerr) ?
+ CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE;
+ }
+ } else if (msg->type == CPC_MSG_TYPE_OVERRUN) {
+ cf->can_id |= CAN_ERR_CRTL;
+ cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+
+ stats->rx_over_errors++;
+ stats->rx_errors++;
+ }
+
+ netif_rx(skb);
+
+ stats->rx_packets++;
+ stats->rx_bytes += cf->can_dlc;
+}
+
+/*
+ * callback for bulk IN urb
+ */
+static void ems_usb_read_bulk_callback(struct urb *urb)
+{
+ struct ems_usb *dev = urb->context;
+ struct net_device *netdev;
+ int retval;
+
+ netdev = dev->netdev;
+
+ if (!netif_device_present(netdev))
+ return;
+
+ switch (urb->status) {
+ case 0: /* success */
+ break;
+
+ case -ENOENT:
+ return;
+
+ default:
+ dev_info(netdev->dev.parent, "Rx URB aborted (%d)\n",
+ urb->status);
+ goto resubmit_urb;
+ }
+
+ if (urb->actual_length > CPC_HEADER_SIZE) {
+ struct ems_cpc_msg *msg;
+ u8 *ibuf = urb->transfer_buffer;
+ u8 msg_count, again, start;
+
+ msg_count = ibuf[0] & ~0x80;
+ again = ibuf[0] & 0x80;
+
+ start = CPC_HEADER_SIZE;
+
+ while (msg_count) {
+ msg = (struct ems_cpc_msg *)&ibuf[start];
+
+ switch (msg->type) {
+ case CPC_MSG_TYPE_CAN_STATE:
+ /* Process CAN state changes */
+ ems_usb_rx_err(dev, msg);
+ break;
+
+ case CPC_MSG_TYPE_CAN_FRAME:
+ case CPC_MSG_TYPE_EXT_CAN_FRAME:
+ case CPC_MSG_TYPE_RTR_FRAME:
+ case CPC_MSG_TYPE_EXT_RTR_FRAME:
+ ems_usb_rx_can_msg(dev, msg);
+ break;
+
+ case CPC_MSG_TYPE_CAN_FRAME_ERROR:
+ /* Process errorframe */
+ ems_usb_rx_err(dev, msg);
+ break;
+
+ case CPC_MSG_TYPE_OVERRUN:
+ /* Message lost while receiving */
+ ems_usb_rx_err(dev, msg);
+ break;
+ }
+
+ start += CPC_MSG_HEADER_LEN + msg->length;
+ msg_count--;
+
+ if (start > urb->transfer_buffer_length) {
+ dev_err(netdev->dev.parent, "format error\n");
+ break;
+ }
+ }
+ }
+
+resubmit_urb:
+ usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2),
+ urb->transfer_buffer, RX_BUFFER_SIZE,
+ ems_usb_read_bulk_callback, dev);
+
+ retval = usb_submit_urb(urb, GFP_ATOMIC);
+
+ if (retval == -ENODEV)
+ netif_device_detach(netdev);
+ else if (retval)
+ dev_err(netdev->dev.parent,
+ "failed resubmitting read bulk urb: %d\n", retval);
+
+ return;
+}
+
+/*
+ * callback for bulk IN urb
+ */
+static void ems_usb_write_bulk_callback(struct urb *urb)
+{
+ struct ems_tx_urb_context *context = urb->context;
+ struct ems_usb *dev;
+ struct net_device *netdev;
+
+ BUG_ON(!context);
+
+ dev = context->dev;
+ netdev = dev->netdev;
+
+ /* free up our allocated buffer */
+ usb_buffer_free(urb->dev, urb->transfer_buffer_length,
+ urb->transfer_buffer, urb->transfer_dma);
+
+ atomic_dec(&dev->active_tx_urbs);
+
+ if (!netif_device_present(netdev))
+ return;
+
+ if (urb->status)
+ dev_info(netdev->dev.parent, "Tx URB aborted (%d)\n",
+ urb->status);
+
+ netdev->trans_start = jiffies;
+
+ /* transmission complete interrupt */
+ netdev->stats.tx_packets++;
+ netdev->stats.tx_bytes += context->dlc;
+
+ can_get_echo_skb(netdev, context->echo_index);
+
+ /* Release context */
+ context->echo_index = MAX_TX_URBS;
+
+ if (netif_queue_stopped(netdev))
+ netif_wake_queue(netdev);
+}
+
+/*
+ * Send the given CPC command synchronously
+ */
+static int ems_usb_command_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
+{
+ int actual_length;
+
+ /* Copy payload */
+ memcpy(&dev->tx_msg_buffer[CPC_HEADER_SIZE], msg,
+ msg->length + CPC_MSG_HEADER_LEN);
+
+ /* Clear header */
+ memset(&dev->tx_msg_buffer[0], 0, CPC_HEADER_SIZE);
+
+ return usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 2),
+ &dev->tx_msg_buffer[0],
+ msg->length + CPC_MSG_HEADER_LEN + CPC_HEADER_SIZE,
+ &actual_length, 1000);
+}
+
+/*
+ * Change CAN controllers' mode register
+ */
+static int ems_usb_write_mode(struct ems_usb *dev, u8 mode)
+{
+ dev->active_params.msg.can_params.cc_params.sja1000.mode = mode;
+
+ return ems_usb_command_msg(dev, &dev->active_params);
+}
+
+/*
+ * Send a CPC_Control command to change behaviour when interface receives a CAN
+ * message, bus error or CAN state changed notifications.
+ */
+static int ems_usb_control_cmd(struct ems_usb *dev, u8 val)
+{
+ struct ems_cpc_msg cmd;
+
+ cmd.type = CPC_CMD_TYPE_CONTROL;
+ cmd.length = CPC_MSG_HEADER_LEN + 1;
+
+ cmd.msgid = 0;
+
+ cmd.msg.generic[0] = val;
+
+ return ems_usb_command_msg(dev, &cmd);
+}
+
+/*
+ * Start interface
+ */
+static int ems_usb_start(struct ems_usb *dev)
+{
+ struct net_device *netdev = dev->netdev;
+ int err, i;
+
+ dev->intr_in_buffer[0] = 0;
+ dev->free_slots = 15; /* initial size */
+
+ for (i = 0; i < MAX_RX_URBS; i++) {
+ struct urb *urb = NULL;
+ u8 *buf = NULL;
+
+ /* create a URB, and a buffer for it */
+ urb = usb_alloc_urb(0, GFP_KERNEL);
+ if (!urb) {
+ dev_err(netdev->dev.parent,
+ "No memory left for URBs\n");
+ return -ENOMEM;
+ }
+
+ buf = usb_buffer_alloc(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
+ &urb->transfer_dma);
+ if (!buf) {
+ dev_err(netdev->dev.parent,
+ "No memory left for USB buffer\n");
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
+
+ usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2),
+ buf, RX_BUFFER_SIZE,
+ ems_usb_read_bulk_callback, dev);
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+ usb_anchor_urb(urb, &dev->rx_submitted);
+
+ err = usb_submit_urb(urb, GFP_KERNEL);
+ if (err) {
+ if (err == -ENODEV)
+ netif_device_detach(dev->netdev);
+
+ usb_unanchor_urb(urb);
+ usb_buffer_free(dev->udev, RX_BUFFER_SIZE, buf,
+ urb->transfer_dma);
+ break;
+ }
+
+ /* Drop reference, USB core will take care of freeing it */
+ usb_free_urb(urb);
+ }
+
+ /* Did we submit any URBs */
+ if (i == 0) {
+ dev_warn(netdev->dev.parent, "couldn't setup read URBs\n");
+ return err;
+ }
+
+ /* Warn if we've couldn't transmit all the URBs */
+ if (i < MAX_RX_URBS)
+ dev_warn(netdev->dev.parent, "rx performance may be slow\n");
+
+ /* Setup and start interrupt URB */
+ usb_fill_int_urb(dev->intr_urb, dev->udev,
+ usb_rcvintpipe(dev->udev, 1),
+ dev->intr_in_buffer,
+ INTR_IN_BUFFER_SIZE,
+ ems_usb_read_interrupt_callback, dev, 1);
+
+ err = usb_submit_urb(dev->intr_urb, GFP_KERNEL);
+ if (err) {
+ if (err == -ENODEV)
+ netif_device_detach(dev->netdev);
+
+ dev_warn(netdev->dev.parent, "intr URB submit failed: %d\n",
+ err);
+
+ return err;
+ }
+
+ /* CPC-USB will transfer received message to host */
+ err = ems_usb_control_cmd(dev, CONTR_CAN_MESSAGE | CONTR_CONT_ON);
+ if (err)
+ goto failed;
+
+ /* CPC-USB will transfer CAN state changes to host */
+ err = ems_usb_control_cmd(dev, CONTR_CAN_STATE | CONTR_CONT_ON);
+ if (err)
+ goto failed;
+
+ /* CPC-USB will transfer bus errors to host */
+ err = ems_usb_control_cmd(dev, CONTR_BUS_ERROR | CONTR_CONT_ON);
+ if (err)
+ goto failed;
+
+ err = ems_usb_write_mode(dev, SJA1000_MOD_NORMAL);
+ if (err)
+ goto failed;
+
+ dev->can.state = CAN_STATE_ERROR_ACTIVE;
+
+ return 0;
+
+failed:
+ if (err == -ENODEV)
+ netif_device_detach(dev->netdev);
+
+ dev_warn(netdev->dev.parent, "couldn't submit control: %d\n", err);
+
+ return err;
+}
+
+static void unlink_all_urbs(struct ems_usb *dev)
+{
+ int i;
+
+ usb_unlink_urb(dev->intr_urb);
+
+ usb_kill_anchored_urbs(&dev->rx_submitted);
+
+ usb_kill_anchored_urbs(&dev->tx_submitted);
+ atomic_set(&dev->active_tx_urbs, 0);
+
+ for (i = 0; i < MAX_TX_URBS; i++)
+ dev->tx_contexts[i].echo_index = MAX_TX_URBS;
+}
+
+static int ems_usb_open(struct net_device *netdev)
+{
+ struct ems_usb *dev = netdev_priv(netdev);
+ int err;
+
+ err = ems_usb_write_mode(dev, SJA1000_MOD_RM);
+ if (err)
+ return err;
+
+ /* common open */
+ err = open_candev(netdev);
+ if (err)
+ return err;
+
+ /* finally start device */
+ err = ems_usb_start(dev);
+ if (err) {
+ if (err == -ENODEV)
+ netif_device_detach(dev->netdev);
+
+ dev_warn(netdev->dev.parent, "couldn't start device: %d\n",
+ err);
+
+ close_candev(netdev);
+
+ return err;
+ }
+
+ dev->open_time = jiffies;
+
+ netif_start_queue(netdev);
+
+ return 0;
+}
+
+static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct ems_usb *dev = netdev_priv(netdev);
+ struct ems_tx_urb_context *context = NULL;
+ struct net_device_stats *stats = &netdev->stats;
+ struct can_frame *cf = (struct can_frame *)skb->data;
+ struct ems_cpc_msg *msg;
+ struct urb *urb;
+ u8 *buf;
+ int i, err;
+ size_t size = CPC_HEADER_SIZE + CPC_MSG_HEADER_LEN
+ + sizeof(struct cpc_can_msg);
+
+ /* create a URB, and a buffer for it, and copy the data to the URB */
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!urb) {
+ dev_err(netdev->dev.parent, "No memory left for URBs\n");
+ goto nomem;
+ }
+
+ buf = usb_buffer_alloc(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma);
+ if (!buf) {
+ dev_err(netdev->dev.parent, "No memory left for USB buffer\n");
+ usb_free_urb(urb);
+ goto nomem;
+ }
+
+ msg = (struct ems_cpc_msg *)&buf[CPC_HEADER_SIZE];
+
+ msg->msg.can_msg.id = cf->can_id & CAN_ERR_MASK;
+ msg->msg.can_msg.length = cf->can_dlc;
+
+ if (cf->can_id & CAN_RTR_FLAG) {
+ msg->type = cf->can_id & CAN_EFF_FLAG ?
+ CPC_CMD_TYPE_EXT_RTR_FRAME : CPC_CMD_TYPE_RTR_FRAME;
+
+ msg->length = CPC_CAN_MSG_MIN_SIZE;
+ } else {
+ msg->type = cf->can_id & CAN_EFF_FLAG ?
+ CPC_CMD_TYPE_EXT_CAN_FRAME : CPC_CMD_TYPE_CAN_FRAME;
+
+ for (i = 0; i < cf->can_dlc; i++)
+ msg->msg.can_msg.msg[i] = cf->data[i];
+
+ msg->length = CPC_CAN_MSG_MIN_SIZE + cf->can_dlc;
+ }
+
+ for (i = 0; i < MAX_TX_URBS; i++) {
+ if (dev->tx_contexts[i].echo_index == MAX_TX_URBS) {
+ context = &dev->tx_contexts[i];
+ break;
+ }
+ }
+
+ /*
+ * May never happen! When this happens we'd more URBs in flight as
+ * allowed (MAX_TX_URBS).
+ */
+ if (!context) {
+ usb_unanchor_urb(urb);
+ usb_buffer_free(dev->udev, size, buf, urb->transfer_dma);
+
+ dev_warn(netdev->dev.parent, "couldn't find free context\n");
+
+ return NETDEV_TX_BUSY;
+ }
+
+ context->dev = dev;
+ context->echo_index = i;
+ context->dlc = cf->can_dlc;
+
+ usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf,
+ size, ems_usb_write_bulk_callback, context);
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+ usb_anchor_urb(urb, &dev->tx_submitted);
+
+ can_put_echo_skb(skb, netdev, context->echo_index);
+
+ atomic_inc(&dev->active_tx_urbs);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (unlikely(err)) {
+ can_free_echo_skb(netdev, context->echo_index);
+
+ usb_unanchor_urb(urb);
+ usb_buffer_free(dev->udev, size, buf, urb->transfer_dma);
+ dev_kfree_skb(skb);
+
+ atomic_dec(&dev->active_tx_urbs);
+
+ if (err == -ENODEV) {
+ netif_device_detach(netdev);
+ } else {
+ dev_warn(netdev->dev.parent, "failed tx_urb %d\n", err);
+
+ stats->tx_dropped++;
+ }
+ } else {
+ netdev->trans_start = jiffies;
+
+ /* Slow down tx path */
+ if (atomic_read(&dev->active_tx_urbs) >= MAX_TX_URBS ||
+ dev->free_slots < 5) {
+ netif_stop_queue(netdev);
+ }
+ }
+
+ /*
+ * Release our reference to this URB, the USB core will eventually free
+ * it entirely.
+ */
+ usb_free_urb(urb);
+
+ return NETDEV_TX_OK;
+
+nomem:
+ if (skb)
+ dev_kfree_skb(skb);
+
+ stats->tx_dropped++;
+
+ return NETDEV_TX_OK;
+}
+
+static int ems_usb_close(struct net_device *netdev)
+{
+ struct ems_usb *dev = netdev_priv(netdev);
+
+ /* Stop polling */
+ unlink_all_urbs(dev);
+
+ netif_stop_queue(netdev);
+
+ /* Set CAN controller to reset mode */
+ if (ems_usb_write_mode(dev, SJA1000_MOD_RM))
+ dev_warn(netdev->dev.parent, "couldn't stop device");
+
+ close_candev(netdev);
+
+ dev->open_time = 0;
+
+ return 0;
+}
+
+static const struct net_device_ops ems_usb_netdev_ops = {
+ .ndo_open = ems_usb_open,
+ .ndo_stop = ems_usb_close,
+ .ndo_start_xmit = ems_usb_start_xmit,
+};
+
+static struct can_bittiming_const ems_usb_bittiming_const = {
+ .name = "ems_usb",
+ .tseg1_min = 1,
+ .tseg1_max = 16,
+ .tseg2_min = 1,
+ .tseg2_max = 8,
+ .sjw_max = 4,
+ .brp_min = 1,
+ .brp_max = 64,
+ .brp_inc = 1,
+};
+
+static int ems_usb_set_mode(struct net_device *netdev, enum can_mode mode)
+{
+ struct ems_usb *dev = netdev_priv(netdev);
+
+ if (!dev->open_time)
+ return -EINVAL;
+
+ switch (mode) {
+ case CAN_MODE_START:
+ if (ems_usb_write_mode(dev, SJA1000_MOD_NORMAL))
+ dev_warn(netdev->dev.parent, "couldn't start device");
+
+ if (netif_queue_stopped(netdev))
+ netif_wake_queue(netdev);
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int ems_usb_set_bittiming(struct net_device *netdev)
+{
+ struct ems_usb *dev = netdev_priv(netdev);
+ struct can_bittiming *bt = &dev->can.bittiming;
+ u8 btr0, btr1;
+
+ btr0 = ((bt->brp - 1) & 0x3f) | (((bt->sjw - 1) & 0x3) << 6);
+ btr1 = ((bt->prop_seg + bt->phase_seg1 - 1) & 0xf) |
+ (((bt->phase_seg2 - 1) & 0x7) << 4);
+ if (dev->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+ btr1 |= 0x80;
+
+ dev_info(netdev->dev.parent, "setting BTR0=0x%02x BTR1=0x%02x\n",
+ btr0, btr1);
+
+ dev->active_params.msg.can_params.cc_params.sja1000.btr0 = btr0;
+ dev->active_params.msg.can_params.cc_params.sja1000.btr1 = btr1;
+
+ return ems_usb_command_msg(dev, &dev->active_params);
+}
+
+static void init_params_sja1000(struct ems_cpc_msg *msg)
+{
+ struct cpc_sja1000_params *sja1000 =
+ &msg->msg.can_params.cc_params.sja1000;
+
+ msg->type = CPC_CMD_TYPE_CAN_PARAMS;
+ msg->length = sizeof(struct cpc_can_params);
+ msg->msgid = 0;
+
+ msg->msg.can_params.cc_type = CPC_CC_TYPE_SJA1000;
+
+ /* Acceptance filter open */
+ sja1000->acc_code0 = 0x00;
+ sja1000->acc_code1 = 0x00;
+ sja1000->acc_code2 = 0x00;
+ sja1000->acc_code3 = 0x00;
+
+ /* Acceptance filter open */
+ sja1000->acc_mask0 = 0xFF;
+ sja1000->acc_mask1 = 0xFF;
+ sja1000->acc_mask2 = 0xFF;
+ sja1000->acc_mask3 = 0xFF;
+
+ sja1000->btr0 = 0;
+ sja1000->btr1 = 0;
+
+ sja1000->outp_contr = SJA1000_DEFAULT_OUTPUT_CONTROL;
+ sja1000->mode = SJA1000_MOD_RM;
+}
+
+/*
+ * probe function for new CPC-USB devices
+ */
+static int ems_usb_probe(struct usb_interface *intf,
+ const struct usb_device_id *id)
+{
+ struct net_device *netdev;
+ struct ems_usb *dev;
+ int i, err = -ENOMEM;
+
+ netdev = alloc_candev(sizeof(struct ems_usb));
+ if (!netdev) {
+ dev_err(netdev->dev.parent, "Couldn't alloc candev\n");
+ return -ENOMEM;
+ }
+
+ dev = netdev_priv(netdev);
+
+ dev->udev = interface_to_usbdev(intf);
+ dev->netdev = netdev;
+
+ dev->can.state = CAN_STATE_STOPPED;
+ dev->can.clock.freq = EMS_USB_ARM7_CLOCK;
+ dev->can.bittiming_const = &ems_usb_bittiming_const;
+ dev->can.do_set_bittiming = ems_usb_set_bittiming;
+ dev->can.do_set_mode = ems_usb_set_mode;
+
+ netdev->flags |= IFF_ECHO; /* we support local echo */
+
+ netdev->netdev_ops = &ems_usb_netdev_ops;
+
+ netdev->flags |= IFF_ECHO; /* we support local echo */
+
+ init_usb_anchor(&dev->rx_submitted);
+
+ init_usb_anchor(&dev->tx_submitted);
+ atomic_set(&dev->active_tx_urbs, 0);
+
+ for (i = 0; i < MAX_TX_URBS; i++)
+ dev->tx_contexts[i].echo_index = MAX_TX_URBS;
+
+ dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL);
+ if (!dev->intr_urb) {
+ dev_err(netdev->dev.parent, "Couldn't alloc intr URB\n");
+ goto cleanup_candev;
+ }
+
+ dev->intr_in_buffer = kzalloc(INTR_IN_BUFFER_SIZE, GFP_KERNEL);
+ if (!dev->intr_in_buffer) {
+ dev_err(netdev->dev.parent, "Couldn't alloc Intr buffer\n");
+ goto cleanup_intr_urb;
+ }
+
+ dev->tx_msg_buffer = kzalloc(CPC_HEADER_SIZE +
+ sizeof(struct ems_cpc_msg), GFP_KERNEL);
+ if (!dev->tx_msg_buffer) {
+ dev_err(netdev->dev.parent, "Couldn't alloc Tx buffer\n");
+ goto cleanup_intr_in_buffer;
+ }
+
+ usb_set_intfdata(intf, dev);
+
+ SET_NETDEV_DEV(netdev, &intf->dev);
+
+ init_params_sja1000(&dev->active_params);
+
+ err = ems_usb_command_msg(dev, &dev->active_params);
+ if (err) {
+ dev_err(netdev->dev.parent,
+ "couldn't initialize controller: %d\n", err);
+ goto cleanup_tx_msg_buffer;
+ }
+
+ err = register_candev(netdev);
+ if (err) {
+ dev_err(netdev->dev.parent,
+ "couldn't register CAN device: %d\n", err);
+ goto cleanup_tx_msg_buffer;
+ }
+
+ return 0;
+
+cleanup_tx_msg_buffer:
+ kfree(dev->tx_msg_buffer);
+
+cleanup_intr_in_buffer:
+ kfree(dev->intr_in_buffer);
+
+cleanup_intr_urb:
+ usb_free_urb(dev->intr_urb);
+
+cleanup_candev:
+ free_candev(netdev);
+
+ return err;
+}
+
+/*
+ * called by the usb core when the device is removed from the system
+ */
+static void ems_usb_disconnect(struct usb_interface *intf)
+{
+ struct ems_usb *dev = usb_get_intfdata(intf);
+
+ usb_set_intfdata(intf, NULL);
+
+ if (dev) {
+ unregister_netdev(dev->netdev);
+ free_candev(dev->netdev);
+
+ unlink_all_urbs(dev);
+
+ usb_free_urb(dev->intr_urb);
+
+ kfree(dev->intr_in_buffer);
+ }
+}
+
+/* usb specific object needed to register this driver with the usb subsystem */
+static struct usb_driver ems_usb_driver = {
+ .name = "ems_usb",
+ .probe = ems_usb_probe,
+ .disconnect = ems_usb_disconnect,
+ .id_table = ems_usb_table,
+};
+
+static int __init ems_usb_init(void)
+{
+ int err;
+
+ printk(KERN_INFO "CPC-USB kernel driver loaded\n");
+
+ /* register this driver with the USB subsystem */
+ err = usb_register(&ems_usb_driver);
+
+ if (err) {
+ err("usb_register failed. Error number %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void __exit ems_usb_exit(void)
+{
+ /* deregister this driver with the USB subsystem */
+ usb_deregister(&ems_usb_driver);
+}
+
+module_init(ems_usb_init);
+module_exit(ems_usb_exit);
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index d45eacb7670..211c8e9182f 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -85,8 +85,6 @@ static int cnic_uio_open(struct uio_info *uinfo, struct inode *inode)
cp->uio_dev = iminor(inode);
- cnic_shutdown_bnx2_rx_ring(dev);
-
cnic_init_bnx2_tx_ring(dev);
cnic_init_bnx2_rx_ring(dev);
@@ -98,6 +96,8 @@ static int cnic_uio_close(struct uio_info *uinfo, struct inode *inode)
struct cnic_dev *dev = uinfo->priv;
struct cnic_local *cp = dev->cnic_priv;
+ cnic_shutdown_bnx2_rx_ring(dev);
+
cp->uio_dev = -1;
return 0;
}
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index 3e3fab8afb1..61f9da2b494 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -1109,7 +1109,7 @@ static int external_switch;
static int __devinit cpmac_probe(struct platform_device *pdev)
{
int rc, phy_id;
- char mdio_bus_id[BUS_ID_SIZE];
+ char mdio_bus_id[MII_BUS_ID_SIZE];
struct resource *mem;
struct cpmac_priv *priv;
struct net_device *dev;
@@ -1118,7 +1118,7 @@ static int __devinit cpmac_probe(struct platform_device *pdev)
pdata = pdev->dev.platform_data;
if (external_switch || dumb_switch) {
- strncpy(mdio_bus_id, "0", BUS_ID_SIZE); /* fixed phys bus */
+ strncpy(mdio_bus_id, "0", MII_BUS_ID_SIZE); /* fixed phys bus */
phy_id = pdev->id;
} else {
for (phy_id = 0; phy_id < PHY_MAX_ADDR; phy_id++) {
@@ -1126,7 +1126,7 @@ static int __devinit cpmac_probe(struct platform_device *pdev)
continue;
if (!cpmac_mii->phy_map[phy_id])
continue;
- strncpy(mdio_bus_id, cpmac_mii->id, BUS_ID_SIZE);
+ strncpy(mdio_bus_id, cpmac_mii->id, MII_BUS_ID_SIZE);
break;
}
}
@@ -1167,7 +1167,7 @@ static int __devinit cpmac_probe(struct platform_device *pdev)
priv->msg_enable = netif_msg_init(debug_level, 0xff);
memcpy(dev->dev_addr, pdata->dev_addr, sizeof(dev->dev_addr));
- snprintf(priv->phy_name, BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id);
+ snprintf(priv->phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id);
priv->phy = phy_connect(dev, priv->phy_name, &cpmac_adjust_link, 0,
PHY_INTERFACE_MODE_MII);
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 977c3d35827..41bd7aeafd8 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -3083,7 +3083,6 @@ static const struct net_device_ops ehea_netdev_ops = {
.ndo_poll_controller = ehea_netpoll,
#endif
.ndo_get_stats = ehea_get_stats,
- .ndo_change_mtu = eth_change_mtu,
.ndo_set_mac_address = ehea_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_multicast_list = ehea_set_multicast_list,
diff --git a/drivers/net/igb/e1000_mac.c b/drivers/net/igb/e1000_mac.c
index a0231cd079f..7d76bb085e1 100644
--- a/drivers/net/igb/e1000_mac.c
+++ b/drivers/net/igb/e1000_mac.c
@@ -286,41 +286,6 @@ void igb_mta_set(struct e1000_hw *hw, u32 hash_value)
}
/**
- * igb_update_mc_addr_list - Update Multicast addresses
- * @hw: pointer to the HW structure
- * @mc_addr_list: array of multicast addresses to program
- * @mc_addr_count: number of multicast addresses to program
- *
- * Updates entire Multicast Table Array.
- * The caller must have a packed mc_addr_list of multicast addresses.
- **/
-void igb_update_mc_addr_list(struct e1000_hw *hw,
- u8 *mc_addr_list, u32 mc_addr_count)
-{
- u32 hash_value, hash_bit, hash_reg;
- int i;
-
- /* clear mta_shadow */
- memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
-
- /* update mta_shadow from mc_addr_list */
- for (i = 0; (u32) i < mc_addr_count; i++) {
- hash_value = igb_hash_mc_addr(hw, mc_addr_list);
-
- hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
- hash_bit = hash_value & 0x1F;
-
- hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
- mc_addr_list += (ETH_ALEN);
- }
-
- /* replace the entire MTA table */
- for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
- array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
- wrfl();
-}
-
-/**
* igb_hash_mc_addr - Generate a multicast hash value
* @hw: pointer to the HW structure
* @mc_addr: pointer to a multicast address
@@ -329,7 +294,7 @@ void igb_update_mc_addr_list(struct e1000_hw *hw,
* the multicast filter table array address and new table value. See
* igb_mta_set()
**/
-u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
+static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
{
u32 hash_value, hash_mask;
u8 bit_shift = 0;
@@ -392,6 +357,41 @@ u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
}
/**
+ * igb_update_mc_addr_list - Update Multicast addresses
+ * @hw: pointer to the HW structure
+ * @mc_addr_list: array of multicast addresses to program
+ * @mc_addr_count: number of multicast addresses to program
+ *
+ * Updates entire Multicast Table Array.
+ * The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void igb_update_mc_addr_list(struct e1000_hw *hw,
+ u8 *mc_addr_list, u32 mc_addr_count)
+{
+ u32 hash_value, hash_bit, hash_reg;
+ int i;
+
+ /* clear mta_shadow */
+ memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+ /* update mta_shadow from mc_addr_list */
+ for (i = 0; (u32) i < mc_addr_count; i++) {
+ hash_value = igb_hash_mc_addr(hw, mc_addr_list);
+
+ hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+ hash_bit = hash_value & 0x1F;
+
+ hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
+ mc_addr_list += (ETH_ALEN);
+ }
+
+ /* replace the entire MTA table */
+ for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+ array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
+ wrfl();
+}
+
+/**
* igb_clear_hw_cntrs_base - Clear base hardware counters
* @hw: pointer to the HW structure
*
diff --git a/drivers/net/igb/e1000_mac.h b/drivers/net/igb/e1000_mac.h
index 7518af8cbbf..bca17d88241 100644
--- a/drivers/net/igb/e1000_mac.h
+++ b/drivers/net/igb/e1000_mac.h
@@ -88,6 +88,5 @@ enum e1000_mng_mode {
#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2
extern void e1000_init_function_pointers_82575(struct e1000_hw *hw);
-extern u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr);
#endif
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index dd688d45e9c..385be601666 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -267,7 +267,8 @@ struct ixgbe_adapter {
enum ixgbe_fc_mode last_lfc_mode;
/* Interrupt Throttle Rate */
- u32 itr_setting;
+ u32 rx_itr_setting;
+ u32 tx_itr_setting;
u16 eitr_low;
u16 eitr_high;
@@ -351,7 +352,8 @@ struct ixgbe_adapter {
struct ixgbe_hw_stats stats;
/* Interrupt Throttle Rate */
- u32 eitr_param;
+ u32 rx_eitr_param;
+ u32 tx_eitr_param;
unsigned long state;
u64 tx_busy;
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 026e94a9984..53b0a668025 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -1929,7 +1929,7 @@ static int ixgbe_get_coalesce(struct net_device *netdev,
ec->tx_max_coalesced_frames_irq = adapter->tx_ring[0].work_limit;
/* only valid if in constant ITR mode */
- switch (adapter->itr_setting) {
+ switch (adapter->rx_itr_setting) {
case 0:
/* throttling disabled */
ec->rx_coalesce_usecs = 0;
@@ -1940,9 +1940,25 @@ static int ixgbe_get_coalesce(struct net_device *netdev,
break;
default:
/* fixed interrupt rate mode */
- ec->rx_coalesce_usecs = 1000000/adapter->eitr_param;
+ ec->rx_coalesce_usecs = 1000000/adapter->rx_eitr_param;
break;
}
+
+ /* only valid if in constant ITR mode */
+ switch (adapter->tx_itr_setting) {
+ case 0:
+ /* throttling disabled */
+ ec->tx_coalesce_usecs = 0;
+ break;
+ case 1:
+ /* dynamic ITR mode */
+ ec->tx_coalesce_usecs = 1;
+ break;
+ default:
+ ec->tx_coalesce_usecs = 1000000/adapter->tx_eitr_param;
+ break;
+ }
+
return 0;
}
@@ -1953,6 +1969,14 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
struct ixgbe_q_vector *q_vector;
int i;
+ /*
+ * don't accept tx specific changes if we've got mixed RxTx vectors
+ * test and jump out here if needed before changing the rx numbers
+ */
+ if ((1000000/ec->tx_coalesce_usecs) != adapter->tx_eitr_param &&
+ adapter->q_vector[0]->txr_count && adapter->q_vector[0]->rxr_count)
+ return -EINVAL;
+
if (ec->tx_max_coalesced_frames_irq)
adapter->tx_ring[0].work_limit = ec->tx_max_coalesced_frames_irq;
@@ -1963,26 +1987,49 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
return -EINVAL;
/* store the value in ints/second */
- adapter->eitr_param = 1000000/ec->rx_coalesce_usecs;
+ adapter->rx_eitr_param = 1000000/ec->rx_coalesce_usecs;
/* static value of interrupt rate */
- adapter->itr_setting = adapter->eitr_param;
+ adapter->rx_itr_setting = adapter->rx_eitr_param;
/* clear the lower bit as its used for dynamic state */
- adapter->itr_setting &= ~1;
+ adapter->rx_itr_setting &= ~1;
} else if (ec->rx_coalesce_usecs == 1) {
/* 1 means dynamic mode */
- adapter->eitr_param = 20000;
- adapter->itr_setting = 1;
+ adapter->rx_eitr_param = 20000;
+ adapter->rx_itr_setting = 1;
} else {
/*
* any other value means disable eitr, which is best
* served by setting the interrupt rate very high
*/
if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
- adapter->eitr_param = IXGBE_MAX_RSC_INT_RATE;
+ adapter->rx_eitr_param = IXGBE_MAX_RSC_INT_RATE;
else
- adapter->eitr_param = IXGBE_MAX_INT_RATE;
- adapter->itr_setting = 0;
+ adapter->rx_eitr_param = IXGBE_MAX_INT_RATE;
+ adapter->rx_itr_setting = 0;
+ }
+
+ if (ec->tx_coalesce_usecs > 1) {
+ /* check the limits */
+ if ((1000000/ec->tx_coalesce_usecs > IXGBE_MAX_INT_RATE) ||
+ (1000000/ec->tx_coalesce_usecs < IXGBE_MIN_INT_RATE))
+ return -EINVAL;
+
+ /* store the value in ints/second */
+ adapter->tx_eitr_param = 1000000/ec->tx_coalesce_usecs;
+
+ /* static value of interrupt rate */
+ adapter->tx_itr_setting = adapter->tx_eitr_param;
+
+ /* clear the lower bit as its used for dynamic state */
+ adapter->tx_itr_setting &= ~1;
+ } else if (ec->tx_coalesce_usecs == 1) {
+ /* 1 means dynamic mode */
+ adapter->tx_eitr_param = 10000;
+ adapter->tx_itr_setting = 1;
+ } else {
+ adapter->tx_eitr_param = IXGBE_MAX_INT_RATE;
+ adapter->tx_itr_setting = 0;
}
/* MSI/MSIx Interrupt Mode */
@@ -1992,17 +2039,17 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
for (i = 0; i < num_vectors; i++) {
q_vector = adapter->q_vector[i];
if (q_vector->txr_count && !q_vector->rxr_count)
- /* tx vector gets half the rate */
- q_vector->eitr = (adapter->eitr_param >> 1);
+ /* tx only */
+ q_vector->eitr = adapter->tx_eitr_param;
else
/* rx only or mixed */
- q_vector->eitr = adapter->eitr_param;
+ q_vector->eitr = adapter->rx_eitr_param;
ixgbe_write_eitr(q_vector);
}
/* Legacy Interrupt Mode */
} else {
q_vector = adapter->q_vector[0];
- q_vector->eitr = adapter->eitr_param;
+ q_vector->eitr = adapter->rx_eitr_param;
ixgbe_write_eitr(q_vector);
}
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 59ad9590e70..c407bd9de0d 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -926,12 +926,12 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
r_idx + 1);
}
- /* if this is a tx only vector halve the interrupt rate */
if (q_vector->txr_count && !q_vector->rxr_count)
- q_vector->eitr = (adapter->eitr_param >> 1);
+ /* tx only */
+ q_vector->eitr = adapter->tx_eitr_param;
else if (q_vector->rxr_count)
- /* rx only */
- q_vector->eitr = adapter->eitr_param;
+ /* rx or mixed */
+ q_vector->eitr = adapter->rx_eitr_param;
ixgbe_write_eitr(q_vector);
}
@@ -1359,7 +1359,7 @@ static int ixgbe_clean_rxonly(struct napi_struct *napi, int budget)
/* If all Rx work done, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->rx_itr_setting & 1)
ixgbe_set_itr_msix(q_vector);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter,
@@ -1420,7 +1420,7 @@ static int ixgbe_clean_rxtx_many(struct napi_struct *napi, int budget)
/* If all Rx work done, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->rx_itr_setting & 1)
ixgbe_set_itr_msix(q_vector);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter,
@@ -1458,10 +1458,10 @@ static int ixgbe_clean_txonly(struct napi_struct *napi, int budget)
if (!ixgbe_clean_tx_irq(q_vector, tx_ring))
work_done = budget;
- /* If all Rx work done, exit the polling mode */
+ /* If all Tx work done, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->tx_itr_setting & 1)
ixgbe_set_itr_msix(q_vector);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx));
@@ -1848,7 +1848,7 @@ static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter)
struct ixgbe_hw *hw = &adapter->hw;
IXGBE_WRITE_REG(hw, IXGBE_EITR(0),
- EITR_INTS_PER_SEC_TO_REG(adapter->eitr_param));
+ EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr_param));
ixgbe_set_ivar(adapter, 0, 0, 0);
ixgbe_set_ivar(adapter, 1, 0, 0);
@@ -1970,6 +1970,50 @@ static u32 ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
}
/**
+ * ixgbe_configure_rscctl - enable RSC for the indicated ring
+ * @adapter: address of board private structure
+ * @index: index of ring to set
+ * @rx_buf_len: rx buffer length
+ **/
+static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, int index,
+ int rx_buf_len)
+{
+ struct ixgbe_ring *rx_ring;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int j;
+ u32 rscctrl;
+
+ rx_ring = &adapter->rx_ring[index];
+ j = rx_ring->reg_idx;
+ rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(j));
+ rscctrl |= IXGBE_RSCCTL_RSCEN;
+ /*
+ * we must limit the number of descriptors so that the
+ * total size of max desc * buf_len is not greater
+ * than 65535
+ */
+ if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) {
+#if (MAX_SKB_FRAGS > 16)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
+#elif (MAX_SKB_FRAGS > 8)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
+#elif (MAX_SKB_FRAGS > 4)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
+#else
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
+#endif
+ } else {
+ if (rx_buf_len < IXGBE_RXBUFFER_4096)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
+ else if (rx_buf_len < IXGBE_RXBUFFER_8192)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
+ else
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(j), rscctrl);
+}
+
+/**
* ixgbe_configure_rx - Configure 8259x Receive Unit after Reset
* @adapter: board private structure
*
@@ -1990,7 +2034,6 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
u32 fctrl, hlreg0;
u32 reta = 0, mrqc = 0;
u32 rdrxctl;
- u32 rscctrl;
int rx_buf_len;
/* Decide whether to use packet split mode or not */
@@ -2148,36 +2191,9 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
/* Enable 82599 HW-RSC */
- for (i = 0; i < adapter->num_rx_queues; i++) {
- rx_ring = &adapter->rx_ring[i];
- j = rx_ring->reg_idx;
- rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(j));
- rscctrl |= IXGBE_RSCCTL_RSCEN;
- /*
- * we must limit the number of descriptors so that the
- * total size of max desc * buf_len is not greater
- * than 65535
- */
- if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) {
-#if (MAX_SKB_FRAGS > 16)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
-#elif (MAX_SKB_FRAGS > 8)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
-#elif (MAX_SKB_FRAGS > 4)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
-#else
- rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
-#endif
- } else {
- if (rx_buf_len < IXGBE_RXBUFFER_4096)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
- else if (rx_buf_len < IXGBE_RXBUFFER_8192)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
- else
- rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
- }
- IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(j), rscctrl);
- }
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ ixgbe_configure_rscctl(adapter, i, rx_buf_len);
+
/* Disable RSC for ACK packets */
IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
(IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
@@ -2926,6 +2942,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+ clear_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state);
+ del_timer_sync(&adapter->sfp_timer);
del_timer_sync(&adapter->watchdog_timer);
cancel_work_sync(&adapter->watchdog_task);
@@ -2989,7 +3007,7 @@ static int ixgbe_poll(struct napi_struct *napi, int budget)
/* If budget not fully consumed, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->rx_itr_setting & 1)
ixgbe_set_itr(adapter);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter, IXGBE_EIMS_RTX_QUEUE);
@@ -3599,7 +3617,10 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
if (!q_vector)
goto err_out;
q_vector->adapter = adapter;
- q_vector->eitr = adapter->eitr_param;
+ if (q_vector->txr_count && !q_vector->rxr_count)
+ q_vector->eitr = adapter->tx_eitr_param;
+ else
+ q_vector->eitr = adapter->rx_eitr_param;
q_vector->v_idx = q_idx;
netif_napi_add(adapter->netdev, &q_vector->napi, (*poll), 64);
adapter->q_vector[q_idx] = q_vector;
@@ -3868,8 +3889,10 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
hw->fc.disable_fc_autoneg = false;
/* enable itr by default in dynamic mode */
- adapter->itr_setting = 1;
- adapter->eitr_param = 20000;
+ adapter->rx_itr_setting = 1;
+ adapter->rx_eitr_param = 20000;
+ adapter->tx_itr_setting = 1;
+ adapter->tx_eitr_param = 10000;
/* set defaults for eitr in MegaBytes */
adapter->eitr_low = 10;
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index f7bdde111df..b5aa974827e 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -1469,6 +1469,7 @@ netxen_nic_resume(struct pci_dev *pdev)
}
netxen_schedule_work(adapter, netxen_fw_poll_work, FW_POLL_DELAY);
+ return 0;
err_out_detach:
netxen_nic_detach(adapter);
@@ -1903,12 +1904,13 @@ static void netxen_tx_timeout_task(struct work_struct *work)
netif_wake_queue(adapter->netdev);
- goto done;
+ clear_bit(__NX_RESETTING, &adapter->state);
} else {
+ clear_bit(__NX_RESETTING, &adapter->state);
if (!netxen_nic_reset_context(adapter)) {
adapter->netdev->trans_start = jiffies;
- goto done;
+ return;
}
/* context reset failed, fall through for fw reset */
@@ -1916,8 +1918,6 @@ static void netxen_tx_timeout_task(struct work_struct *work)
request_reset:
adapter->need_fw_reset = 1;
-done:
- clear_bit(__NX_RESETTING, &adapter->state);
}
struct net_device_stats *netxen_nic_get_stats(struct net_device *netdev)
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 97db1c73234..474876c879c 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -340,12 +340,11 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link)
base = &virt[hw_info[i].offset & (req.Size-1)];
if ((readb(base+0) == hw_info[i].a0) &&
(readb(base+2) == hw_info[i].a1) &&
- (readb(base+4) == hw_info[i].a2))
- break;
- }
- if (i < NR_INFO) {
- for (j = 0; j < 6; j++)
- dev->dev_addr[j] = readb(base + (j<<1));
+ (readb(base+4) == hw_info[i].a2)) {
+ for (j = 0; j < 6; j++)
+ dev->dev_addr[j] = readb(base + (j<<1));
+ break;
+ }
}
iounmap(virt);
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 07a7e4b8f8f..cc4b2f99989 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -884,13 +884,12 @@ static int efx_wanted_rx_queues(void)
int count;
int cpu;
- if (unlikely(!alloc_cpumask_var(&core_mask, GFP_KERNEL))) {
+ if (unlikely(!zalloc_cpumask_var(&core_mask, GFP_KERNEL))) {
printk(KERN_WARNING
"sfc: RSS disabled due to allocation failure\n");
return 1;
}
- cpumask_clear(core_mask);
count = 0;
for_each_online_cpu(cpu) {
if (!cpumask_test_cpu(cpu, core_mask)) {
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 15140f9f2e9..ef1165718dd 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1497,7 +1497,6 @@ static int sky2_up(struct net_device *dev)
if (ramsize > 0) {
u32 rxspace;
- hw->flags |= SKY2_HW_RAM_BUFFER;
pr_debug(PFX "%s: ram buffer %dK\n", dev->name, ramsize);
if (ramsize < 16)
rxspace = ramsize / 2;
@@ -2926,6 +2925,9 @@ static int __devinit sky2_init(struct sky2_hw *hw)
++hw->ports;
}
+ if (sky2_read8(hw, B2_E_0))
+ hw->flags |= SKY2_HW_RAM_BUFFER;
+
return 0;
}
diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c
index f1e5e4542c2..bc74db0d12f 100644
--- a/drivers/net/sunvnet.c
+++ b/drivers/net/sunvnet.c
@@ -1016,7 +1016,6 @@ static const struct net_device_ops vnet_ops = {
.ndo_open = vnet_open,
.ndo_stop = vnet_close,
.ndo_set_multicast_list = vnet_set_rx_mode,
- .ndo_change_mtu = eth_change_mtu,
.ndo_set_mac_address = vnet_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_tx_timeout = vnet_tx_timeout,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d3ee1994b02..4fdfa2ae541 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -946,8 +946,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
char *name;
unsigned long flags = 0;
- err = -EINVAL;
-
if (!capable(CAP_NET_ADMIN))
return -EPERM;
err = security_tun_dev_create();
@@ -964,7 +962,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
flags |= TUN_TAP_DEV;
name = "tap%d";
} else
- goto failed;
+ return -EINVAL;
if (*ifr->ifr_name)
name = ifr->ifr_name;
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index e2a39b9be96..e391ef969c2 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -263,6 +263,7 @@ static int kaweth_control(struct kaweth_device *kaweth,
int timeout)
{
struct usb_ctrlrequest *dr;
+ int retval;
dbg("kaweth_control()");
@@ -278,18 +279,21 @@ static int kaweth_control(struct kaweth_device *kaweth,
return -ENOMEM;
}
- dr->bRequestType= requesttype;
+ dr->bRequestType = requesttype;
dr->bRequest = request;
dr->wValue = cpu_to_le16(value);
dr->wIndex = cpu_to_le16(index);
dr->wLength = cpu_to_le16(size);
- return kaweth_internal_control_msg(kaweth->dev,
- pipe,
- dr,
- data,
- size,
- timeout);
+ retval = kaweth_internal_control_msg(kaweth->dev,
+ pipe,
+ dr,
+ data,
+ size,
+ timeout);
+
+ kfree(dr);
+ return retval;
}
/****************************************************************
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 938fb3530a7..c6c922247d0 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1227,7 +1227,7 @@ static const struct driver_info smsc95xx_info = {
.rx_fixup = smsc95xx_rx_fixup,
.tx_fixup = smsc95xx_tx_fixup,
.status = smsc95xx_status,
- .flags = FLAG_ETHER,
+ .flags = FLAG_ETHER | FLAG_SEND_ZLP,
};
static const struct usb_device_id products[] = {
@@ -1237,10 +1237,75 @@ static const struct usb_device_id products[] = {
.driver_info = (unsigned long) &smsc95xx_info,
},
{
+ /* SMSC9505 USB Ethernet Device */
+ USB_DEVICE(0x0424, 0x9505),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9500A USB Ethernet Device */
+ USB_DEVICE(0x0424, 0x9E00),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9505A USB Ethernet Device */
+ USB_DEVICE(0x0424, 0x9E01),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
/* SMSC9512/9514 USB Hub & Ethernet Device */
USB_DEVICE(0x0424, 0xec00),
.driver_info = (unsigned long) &smsc95xx_info,
},
+ {
+ /* SMSC9500 USB Ethernet Device (SAL10) */
+ USB_DEVICE(0x0424, 0x9900),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9505 USB Ethernet Device (SAL10) */
+ USB_DEVICE(0x0424, 0x9901),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9500A USB Ethernet Device (SAL10) */
+ USB_DEVICE(0x0424, 0x9902),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9505A USB Ethernet Device (SAL10) */
+ USB_DEVICE(0x0424, 0x9903),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9512/9514 USB Hub & Ethernet Device (SAL10) */
+ USB_DEVICE(0x0424, 0x9904),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9500A USB Ethernet Device (HAL) */
+ USB_DEVICE(0x0424, 0x9905),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9505A USB Ethernet Device (HAL) */
+ USB_DEVICE(0x0424, 0x9906),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9500 USB Ethernet Device (Alternate ID) */
+ USB_DEVICE(0x0424, 0x9907),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9500A USB Ethernet Device (Alternate ID) */
+ USB_DEVICE(0x0424, 0x9908),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9512/9514 USB Hub & Ethernet Device (Alternate ID) */
+ USB_DEVICE(0x0424, 0x9909),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
{ }, /* END */
};
MODULE_DEVICE_TABLE(usb, products);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 24b36f79515..ca5ca5ae061 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1049,7 +1049,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
* NOTE: strictly conforming cdc-ether devices should expect
* the ZLP here, but ignore the one-byte packet.
*/
- if ((length % dev->maxpacket) == 0) {
+ if (!(info->flags & FLAG_SEND_ZLP) && (length % dev->maxpacket) == 0) {
urb->transfer_buffer_length++;
if (skb_tailroom(skb)) {
skb->data[skb->len] = 0;
diff --git a/drivers/net/wireless/arlan-proc.c b/drivers/net/wireless/arlan-proc.c
index 2ab1d59870f..a8b689635a3 100644
--- a/drivers/net/wireless/arlan-proc.c
+++ b/drivers/net/wireless/arlan-proc.c
@@ -402,7 +402,7 @@ static int arlan_setup_card_by_book(struct net_device *dev)
static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0";
-static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int i;
@@ -629,7 +629,7 @@ final:
*lenp = pos;
if (!write)
- retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ retv = proc_dostring(ctl, write, buffer, lenp, ppos);
else
{
*lenp = 0;
@@ -639,7 +639,7 @@ final:
}
-static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info161719(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int i;
@@ -669,11 +669,11 @@ static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp
final:
*lenp = pos;
- retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ retv = proc_dostring(ctl, write, buffer, lenp, ppos);
return retv;
}
-static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_infotxRing(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int i;
@@ -698,11 +698,11 @@ static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp
SARLBNpln(u_char, txBuffer, 0x800);
final:
*lenp = pos;
- retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ retv = proc_dostring(ctl, write, buffer, lenp, ppos);
return retv;
}
-static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_inforxRing(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int i;
@@ -726,11 +726,11 @@ static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp
SARLBNpln(u_char, rxBuffer, 0x800);
final:
*lenp = pos;
- retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ retv = proc_dostring(ctl, write, buffer, lenp, ppos);
return retv;
}
-static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info18(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int i;
@@ -756,7 +756,7 @@ static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
final:
*lenp = pos;
- retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ retv = proc_dostring(ctl, write, buffer, lenp, ppos);
return retv;
}
@@ -766,7 +766,7 @@ final:
static char conf_reset_result[200];
-static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
+static int arlan_configure(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int pos = 0;
@@ -788,10 +788,10 @@ static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
return -1;
*lenp = pos;
- return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ return proc_dostring(ctl, write, buffer, lenp, ppos);
}
-static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_reset(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int pos = 0;
@@ -811,7 +811,7 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
} else
return -1;
*lenp = pos + 3;
- return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ return proc_dostring(ctl, write, buffer, lenp, ppos);
}
diff --git a/drivers/net/wireless/ath/ar9170/usb.c b/drivers/net/wireless/ath/ar9170/usb.c
index e0138ac8bf5..e974e5829e1 100644
--- a/drivers/net/wireless/ath/ar9170/usb.c
+++ b/drivers/net/wireless/ath/ar9170/usb.c
@@ -64,6 +64,8 @@ static struct usb_device_id ar9170_usb_ids[] = {
{ USB_DEVICE(0x0cf3, 0x9170) },
/* Atheros TG121N */
{ USB_DEVICE(0x0cf3, 0x1001) },
+ /* TP-Link TL-WN821N v2 */
+ { USB_DEVICE(0x0cf3, 0x1002) },
/* Cace Airpcap NX */
{ USB_DEVICE(0xcace, 0x0300) },
/* D-Link DWA 160A */
diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c
index 3234995e888..0ad6d0b76e9 100644
--- a/drivers/net/wireless/ath/ath9k/calib.c
+++ b/drivers/net/wireless/ath/ath9k/calib.c
@@ -609,14 +609,24 @@ void ath9k_hw_loadnf(struct ath_hw *ah, struct ath9k_channel *chan)
AR_PHY_CH1_EXT_CCA,
AR_PHY_CH2_EXT_CCA
};
- u8 chainmask;
+ u8 chainmask, rx_chain_status;
+ rx_chain_status = REG_READ(ah, AR_PHY_RX_CHAINMASK);
if (AR_SREV_9285(ah))
chainmask = 0x9;
- else if (AR_SREV_9280(ah) || AR_SREV_9287(ah))
- chainmask = 0x1B;
- else
- chainmask = 0x3F;
+ else if (AR_SREV_9280(ah) || AR_SREV_9287(ah)) {
+ if ((rx_chain_status & 0x2) || (rx_chain_status & 0x4))
+ chainmask = 0x1B;
+ else
+ chainmask = 0x09;
+ } else {
+ if (rx_chain_status & 0x4)
+ chainmask = 0x3F;
+ else if (rx_chain_status & 0x2)
+ chainmask = 0x1B;
+ else
+ chainmask = 0x09;
+ }
h = ah->nfCalHist;
@@ -697,6 +707,8 @@ void ath9k_init_nfcal_hist_buffer(struct ath_hw *ah)
noise_floor = AR_PHY_CCA_MAX_AR9280_GOOD_VALUE;
else if (AR_SREV_9285(ah))
noise_floor = AR_PHY_CCA_MAX_AR9285_GOOD_VALUE;
+ else if (AR_SREV_9287(ah))
+ noise_floor = AR_PHY_CCA_MAX_AR9287_GOOD_VALUE;
else
noise_floor = AR_PHY_CCA_MAX_AR5416_GOOD_VALUE;
@@ -924,6 +936,7 @@ static inline void ath9k_hw_9285_pa_cal(struct ath_hw *ah, bool is_reset)
regVal |= (1 << (19 + i));
REG_WRITE(ah, 0x7834, regVal);
udelay(1);
+ regVal = REG_READ(ah, 0x7834);
regVal &= (~(0x1 << (19 + i)));
reg_field = MS(REG_READ(ah, 0x7840), AR9285_AN_RXTXBB1_SPARE9);
regVal |= (reg_field << (19 + i));
diff --git a/drivers/net/wireless/ath/ath9k/calib.h b/drivers/net/wireless/ath/ath9k/calib.h
index 019bcbba40e..9028ab193e4 100644
--- a/drivers/net/wireless/ath/ath9k/calib.h
+++ b/drivers/net/wireless/ath/ath9k/calib.h
@@ -28,6 +28,7 @@ extern const struct ath9k_percal_data adc_init_dc_cal;
#define AR_PHY_CCA_MAX_AR5416_GOOD_VALUE -85
#define AR_PHY_CCA_MAX_AR9280_GOOD_VALUE -112
#define AR_PHY_CCA_MAX_AR9285_GOOD_VALUE -118
+#define AR_PHY_CCA_MAX_AR9287_GOOD_VALUE -118
#define AR_PHY_CCA_MAX_HIGH_VALUE -62
#define AR_PHY_CCA_MIN_BAD_VALUE -140
#define AR_PHY_CCA_FILTERWINDOW_LENGTH_INIT 3
diff --git a/drivers/net/wireless/ath/ath9k/eeprom_def.c b/drivers/net/wireless/ath/ath9k/eeprom_def.c
index ae7fb5dcb26..4071fc91da0 100644
--- a/drivers/net/wireless/ath/ath9k/eeprom_def.c
+++ b/drivers/net/wireless/ath/ath9k/eeprom_def.c
@@ -509,6 +509,8 @@ static void ath9k_hw_def_set_board_values(struct ath_hw *ah,
REG_RMW_FIELD(ah, AR_AN_TOP1, AR_AN_TOP1_DACIPMODE,
eep->baseEepHeader.dacLpMode);
+ udelay(100);
+
REG_RMW_FIELD(ah, AR_PHY_FRAME_CTL, AR_PHY_FRAME_CTL_TX_CLIP,
pModal->miscBits >> 2);
@@ -902,7 +904,7 @@ static void ath9k_hw_set_def_power_per_rate_table(struct ath_hw *ah,
u16 powerLimit)
{
#define REDUCE_SCALED_POWER_BY_TWO_CHAIN 6 /* 10*log10(2)*2 */
-#define REDUCE_SCALED_POWER_BY_THREE_CHAIN 10 /* 10*log10(3)*2 */
+#define REDUCE_SCALED_POWER_BY_THREE_CHAIN 9 /* 10*log10(3)*2 */
struct ath_regulatory *regulatory = ath9k_hw_regulatory(ah);
struct ar5416_eeprom_def *pEepData = &ah->eeprom.def;
diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index b6c6cca0781..ca7694caf36 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -842,7 +842,7 @@ static void ath9k_hw_init_mode_regs(struct ath_hw *ah)
static void ath9k_hw_init_mode_gain_regs(struct ath_hw *ah)
{
- if (AR_SREV_9287_11(ah))
+ if (AR_SREV_9287_11_OR_LATER(ah))
INIT_INI_ARRAY(&ah->iniModesRxGain,
ar9287Modes_rx_gain_9287_1_1,
ARRAY_SIZE(ar9287Modes_rx_gain_9287_1_1), 6);
@@ -853,7 +853,7 @@ static void ath9k_hw_init_mode_gain_regs(struct ath_hw *ah)
else if (AR_SREV_9280_20(ah))
ath9k_hw_init_rxgain_ini(ah);
- if (AR_SREV_9287_11(ah)) {
+ if (AR_SREV_9287_11_OR_LATER(ah)) {
INIT_INI_ARRAY(&ah->iniModesTxGain,
ar9287Modes_tx_gain_9287_1_1,
ARRAY_SIZE(ar9287Modes_tx_gain_9287_1_1), 6);
@@ -965,7 +965,7 @@ int ath9k_hw_init(struct ath_hw *ah)
ath9k_hw_init_mode_regs(ah);
if (ah->is_pciexpress)
- ath9k_hw_configpcipowersave(ah, 0);
+ ath9k_hw_configpcipowersave(ah, 0, 0);
else
ath9k_hw_disablepcie(ah);
@@ -1273,6 +1273,15 @@ static void ath9k_hw_override_ini(struct ath_hw *ah,
*/
REG_SET_BIT(ah, AR_DIAG_SW, (AR_DIAG_RX_DIS | AR_DIAG_RX_ABORT));
+ if (AR_SREV_9280_10_OR_LATER(ah)) {
+ val = REG_READ(ah, AR_PCU_MISC_MODE2) &
+ (~AR_PCU_MISC_MODE2_HWWAR1);
+
+ if (AR_SREV_9287_10_OR_LATER(ah))
+ val = val & (~AR_PCU_MISC_MODE2_HWWAR2);
+
+ REG_WRITE(ah, AR_PCU_MISC_MODE2, val);
+ }
if (!AR_SREV_5416_20_OR_LATER(ah) ||
AR_SREV_9280_10_OR_LATER(ah))
@@ -1784,7 +1793,7 @@ static void ath9k_hw_set_regs(struct ath_hw *ah, struct ath9k_channel *chan,
static bool ath9k_hw_chip_reset(struct ath_hw *ah,
struct ath9k_channel *chan)
{
- if (OLC_FOR_AR9280_20_LATER) {
+ if (AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL)) {
if (!ath9k_hw_set_reset_reg(ah, ATH9K_RESET_POWER_ON))
return false;
} else if (!ath9k_hw_set_reset_reg(ah, ATH9K_RESET_WARM))
@@ -2338,6 +2347,7 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
struct ath9k_channel *curchan = ah->curchan;
u32 saveDefAntenna;
u32 macStaId1;
+ u64 tsf = 0;
int i, rx_chainmask, r;
ah->extprotspacing = sc->ht_extprotspacing;
@@ -2347,7 +2357,7 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
if (!ath9k_hw_setpower(ah, ATH9K_PM_AWAKE))
return -EIO;
- if (curchan)
+ if (curchan && !ah->chip_fullsleep)
ath9k_hw_getnf(ah, curchan);
if (bChannelChange &&
@@ -2356,8 +2366,8 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
(chan->channel != ah->curchan->channel) &&
((chan->channelFlags & CHANNEL_ALL) ==
(ah->curchan->channelFlags & CHANNEL_ALL)) &&
- (!AR_SREV_9280(ah) || (!IS_CHAN_A_5MHZ_SPACED(chan) &&
- !IS_CHAN_A_5MHZ_SPACED(ah->curchan)))) {
+ !(AR_SREV_9280(ah) || IS_CHAN_A_5MHZ_SPACED(chan) ||
+ IS_CHAN_A_5MHZ_SPACED(ah->curchan))) {
if (ath9k_hw_channel_change(ah, chan, sc->tx_chan_width)) {
ath9k_hw_loadnf(ah, ah->curchan);
@@ -2372,6 +2382,10 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
macStaId1 = REG_READ(ah, AR_STA_ID1) & AR_STA_ID1_BASE_RATE_11B;
+ /* For chips on which RTC reset is done, save TSF before it gets cleared */
+ if (AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL))
+ tsf = ath9k_hw_gettsf64(ah);
+
saveLedState = REG_READ(ah, AR_CFG_LED) &
(AR_CFG_LED_ASSOC_CTL | AR_CFG_LED_MODE_SEL |
AR_CFG_LED_BLINK_THRESH_SEL | AR_CFG_LED_BLINK_SLOW);
@@ -2398,6 +2412,10 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
udelay(50);
}
+ /* Restore TSF */
+ if (tsf && AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL))
+ ath9k_hw_settsf64(ah, tsf);
+
if (AR_SREV_9280_10_OR_LATER(ah))
REG_SET_BIT(ah, AR_GPIO_INPUT_EN_VAL, AR_GPIO_JTAG_DISABLE);
@@ -3005,9 +3023,10 @@ void ath9k_ps_restore(struct ath_softc *sc)
* Programming the SerDes must go through the same 288 bit serial shift
* register as the other analog registers. Hence the 9 writes.
*/
-void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore)
+void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore, int power_off)
{
u8 i;
+ u32 val;
if (ah->is_pciexpress != true)
return;
@@ -3017,84 +3036,113 @@ void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore)
return;
/* Nothing to do on restore for 11N */
- if (restore)
- return;
+ if (!restore) {
+ if (AR_SREV_9280_20_OR_LATER(ah)) {
+ /*
+ * AR9280 2.0 or later chips use SerDes values from the
+ * initvals.h initialized depending on chipset during
+ * ath9k_hw_init()
+ */
+ for (i = 0; i < ah->iniPcieSerdes.ia_rows; i++) {
+ REG_WRITE(ah, INI_RA(&ah->iniPcieSerdes, i, 0),
+ INI_RA(&ah->iniPcieSerdes, i, 1));
+ }
+ } else if (AR_SREV_9280(ah) &&
+ (ah->hw_version.macRev == AR_SREV_REVISION_9280_10)) {
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fd00);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
+
+ /* RX shut off when elecidle is asserted */
+ REG_WRITE(ah, AR_PCIE_SERDES, 0xa8000019);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x13160820);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980560);
+
+ /* Shut off CLKREQ active in L1 */
+ if (ah->config.pcie_clock_req)
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffc);
+ else
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffd);
- if (AR_SREV_9280_20_OR_LATER(ah)) {
- /*
- * AR9280 2.0 or later chips use SerDes values from the
- * initvals.h initialized depending on chipset during
- * ath9k_hw_init()
- */
- for (i = 0; i < ah->iniPcieSerdes.ia_rows; i++) {
- REG_WRITE(ah, INI_RA(&ah->iniPcieSerdes, i, 0),
- INI_RA(&ah->iniPcieSerdes, i, 1));
- }
- } else if (AR_SREV_9280(ah) &&
- (ah->hw_version.macRev == AR_SREV_REVISION_9280_10)) {
- REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fd00);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x00043007);
- /* RX shut off when elecidle is asserted */
- REG_WRITE(ah, AR_PCIE_SERDES, 0xa8000019);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x13160820);
- REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980560);
+ /* Load the new settings */
+ REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
- /* Shut off CLKREQ active in L1 */
- if (ah->config.pcie_clock_req)
- REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffc);
- else
- REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffd);
-
- REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
- REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x00043007);
+ } else {
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fc00);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
- /* Load the new settings */
- REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
+ /* RX shut off when elecidle is asserted */
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x28000039);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x53160824);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980579);
- } else {
- REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fc00);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
+ /*
+ * Ignore ah->ah_config.pcie_clock_req setting for
+ * pre-AR9280 11n
+ */
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x001defff);
- /* RX shut off when elecidle is asserted */
- REG_WRITE(ah, AR_PCIE_SERDES, 0x28000039);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x53160824);
- REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980579);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x000e3007);
- /*
- * Ignore ah->ah_config.pcie_clock_req setting for
- * pre-AR9280 11n
- */
- REG_WRITE(ah, AR_PCIE_SERDES, 0x001defff);
+ /* Load the new settings */
+ REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
+ }
- REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
- REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x000e3007);
+ udelay(1000);
- /* Load the new settings */
- REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
- }
+ /* set bit 19 to allow forcing of pcie core into L1 state */
+ REG_SET_BIT(ah, AR_PCIE_PM_CTRL, AR_PCIE_PM_CTRL_ENA);
- udelay(1000);
+ /* Several PCIe massages to ensure proper behaviour */
+ if (ah->config.pcie_waen) {
+ val = ah->config.pcie_waen;
+ if (!power_off)
+ val &= (~AR_WA_D3_L1_DISABLE);
+ } else {
+ if (AR_SREV_9285(ah) || AR_SREV_9271(ah) ||
+ AR_SREV_9287(ah)) {
+ val = AR9285_WA_DEFAULT;
+ if (!power_off)
+ val &= (~AR_WA_D3_L1_DISABLE);
+ } else if (AR_SREV_9280(ah)) {
+ /*
+ * On AR9280 chips bit 22 of 0x4004 needs to be
+ * set otherwise card may disappear.
+ */
+ val = AR9280_WA_DEFAULT;
+ if (!power_off)
+ val &= (~AR_WA_D3_L1_DISABLE);
+ } else
+ val = AR_WA_DEFAULT;
+ }
- /* set bit 19 to allow forcing of pcie core into L1 state */
- REG_SET_BIT(ah, AR_PCIE_PM_CTRL, AR_PCIE_PM_CTRL_ENA);
+ REG_WRITE(ah, AR_WA, val);
+ }
- /* Several PCIe massages to ensure proper behaviour */
- if (ah->config.pcie_waen) {
- REG_WRITE(ah, AR_WA, ah->config.pcie_waen);
- } else {
- if (AR_SREV_9285(ah) || AR_SREV_9271(ah) || AR_SREV_9287(ah))
- REG_WRITE(ah, AR_WA, AR9285_WA_DEFAULT);
+ if (power_off) {
/*
- * On AR9280 chips bit 22 of 0x4004 needs to be set to
- * otherwise card may disappear.
+ * Set PCIe workaround bits
+ * bit 14 in WA register (disable L1) should only
+ * be set when device enters D3 and be cleared
+ * when device comes back to D0.
*/
- else if (AR_SREV_9280(ah))
- REG_WRITE(ah, AR_WA, AR9280_WA_DEFAULT);
- else
- REG_WRITE(ah, AR_WA, AR_WA_DEFAULT);
+ if (ah->config.pcie_waen) {
+ if (ah->config.pcie_waen & AR_WA_D3_L1_DISABLE)
+ REG_SET_BIT(ah, AR_WA, AR_WA_D3_L1_DISABLE);
+ } else {
+ if (((AR_SREV_9285(ah) || AR_SREV_9271(ah) ||
+ AR_SREV_9287(ah)) &&
+ (AR9285_WA_DEFAULT & AR_WA_D3_L1_DISABLE)) ||
+ (AR_SREV_9280(ah) &&
+ (AR9280_WA_DEFAULT & AR_WA_D3_L1_DISABLE))) {
+ REG_SET_BIT(ah, AR_WA, AR_WA_D3_L1_DISABLE);
+ }
+ }
}
}
@@ -3652,15 +3700,7 @@ void ath9k_hw_fill_cap_info(struct ath_hw *ah)
}
#endif
- if ((ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCI) ||
- (ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCIE) ||
- (ah->hw_version.macVersion == AR_SREV_VERSION_9160) ||
- (ah->hw_version.macVersion == AR_SREV_VERSION_9100) ||
- (ah->hw_version.macVersion == AR_SREV_VERSION_9280) ||
- (ah->hw_version.macVersion == AR_SREV_VERSION_9285))
- pCap->hw_caps &= ~ATH9K_HW_CAP_AUTOSLEEP;
- else
- pCap->hw_caps |= ATH9K_HW_CAP_AUTOSLEEP;
+ pCap->hw_caps &= ~ATH9K_HW_CAP_AUTOSLEEP;
if (AR_SREV_9280(ah) || AR_SREV_9285(ah))
pCap->hw_caps &= ~ATH9K_HW_CAP_4KB_SPLITTRANS;
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index 9106a0b537d..b8923457182 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -106,7 +106,7 @@
#define AH_TSF_WRITE_TIMEOUT 100 /* (us) */
#define AH_TIME_QUANTUM 10
#define AR_KEYTABLE_SIZE 128
-#define POWER_UP_TIME 200000
+#define POWER_UP_TIME 10000
#define SPUR_RSSI_THRESH 40
#define CAB_TIMEOUT_VAL 10
@@ -650,7 +650,7 @@ void ath9k_hw_set_sta_beacon_timers(struct ath_hw *ah,
const struct ath9k_beacon_state *bs);
bool ath9k_hw_setpower(struct ath_hw *ah,
enum ath9k_power_mode mode);
-void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore);
+void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore, int power_off);
/* Interrupt Handling */
bool ath9k_hw_intrpend(struct ath_hw *ah);
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 3dc7b5a13e6..52bed89063d 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1131,7 +1131,7 @@ void ath_radio_enable(struct ath_softc *sc)
int r;
ath9k_ps_wakeup(sc);
- ath9k_hw_configpcipowersave(ah, 0);
+ ath9k_hw_configpcipowersave(ah, 0, 0);
if (!ah->curchan)
ah->curchan = ath_get_curchannel(sc, sc->hw);
@@ -1202,7 +1202,7 @@ void ath_radio_disable(struct ath_softc *sc)
spin_unlock_bh(&sc->sc_resetlock);
ath9k_hw_phy_disable(ah);
- ath9k_hw_configpcipowersave(ah, 1);
+ ath9k_hw_configpcipowersave(ah, 1, 1);
ath9k_ps_restore(sc);
ath9k_hw_setpower(ah, ATH9K_PM_FULL_SLEEP);
}
@@ -1226,11 +1226,6 @@ static void ath9k_rfkill_poll_state(struct ieee80211_hw *hw)
bool blocked = !!ath_is_rfkill_set(sc);
wiphy_rfkill_set_hw_state(hw->wiphy, blocked);
-
- if (blocked)
- ath_radio_disable(sc);
- else
- ath_radio_enable(sc);
}
static void ath_start_rfkill_poll(struct ath_softc *sc)
@@ -1260,6 +1255,7 @@ void ath_detach(struct ath_softc *sc)
DPRINTF(sc, ATH_DBG_CONFIG, "Detach ATH hw\n");
ath_deinit_leds(sc);
+ wiphy_rfkill_stop_polling(sc->hw->wiphy);
for (i = 0; i < sc->num_sec_wiphy; i++) {
struct ath_wiphy *aphy = sc->sec_wiphy[i];
@@ -1942,7 +1938,7 @@ static int ath9k_start(struct ieee80211_hw *hw)
init_channel = ath_get_curchannel(sc, hw);
/* Reset SERDES registers */
- ath9k_hw_configpcipowersave(sc->sc_ah, 0);
+ ath9k_hw_configpcipowersave(sc->sc_ah, 0, 0);
/*
* The basic interface to setting the hardware in a good
@@ -2166,11 +2162,9 @@ static void ath9k_stop(struct ieee80211_hw *hw)
} else
sc->rx.rxlink = NULL;
- wiphy_rfkill_stop_polling(sc->hw->wiphy);
-
/* disable HAL and put h/w to sleep */
ath9k_hw_disable(sc->sc_ah);
- ath9k_hw_configpcipowersave(sc->sc_ah, 1);
+ ath9k_hw_configpcipowersave(sc->sc_ah, 1, 1);
ath9k_hw_setpower(sc->sc_ah, ATH9K_PM_FULL_SLEEP);
sc->sc_flags |= SC_OP_INVALID;
diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h
index e5c29eb86e8..d83b77f821e 100644
--- a/drivers/net/wireless/ath/ath9k/reg.h
+++ b/drivers/net/wireless/ath/ath9k/reg.h
@@ -676,8 +676,9 @@
#define AR_RC_HOSTIF 0x00000100
#define AR_WA 0x4004
+#define AR_WA_D3_L1_DISABLE (1 << 14)
#define AR9285_WA_DEFAULT 0x004a05cb
-#define AR9280_WA_DEFAULT 0x0040073f
+#define AR9280_WA_DEFAULT 0x0040073b
#define AR_WA_DEFAULT 0x0000073f
diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig
index 83e38134acc..54ea61c15d8 100644
--- a/drivers/net/wireless/b43/Kconfig
+++ b/drivers/net/wireless/b43/Kconfig
@@ -61,11 +61,28 @@ config B43_PCMCIA
If unsure, say N.
+config B43_SDIO
+ bool "Broadcom 43xx SDIO device support (EXPERIMENTAL)"
+ depends on B43 && SSB_SDIOHOST_POSSIBLE && EXPERIMENTAL
+ select SSB_SDIOHOST
+ ---help---
+ Broadcom 43xx device support for Soft-MAC SDIO devices.
+
+ With this config option you can drive Soft-MAC b43 cards with a
+ Secure Digital I/O interface.
+ This includes the WLAN daughter card found on the Nintendo Wii
+ video game console.
+ Note that this does not support Broadcom 43xx Full-MAC devices.
+
+ It's safe to select Y here, even if you don't have a B43 SDIO device.
+
+ If unsure, say N.
+
# Data transfers to the device via PIO
-# This is only needed on PCMCIA devices. All others can do DMA properly.
+# This is only needed on PCMCIA and SDIO devices. All others can do DMA properly.
config B43_PIO
bool
- depends on B43 && (B43_PCMCIA || B43_FORCE_PIO)
+ depends on B43 && (B43_SDIO || B43_PCMCIA || B43_FORCE_PIO)
select SSB_BLOCKIO
default y
diff --git a/drivers/net/wireless/b43/Makefile b/drivers/net/wireless/b43/Makefile
index da379f4b0c3..84772a2542d 100644
--- a/drivers/net/wireless/b43/Makefile
+++ b/drivers/net/wireless/b43/Makefile
@@ -16,6 +16,7 @@ b43-$(CONFIG_B43_PIO) += pio.o
b43-y += rfkill.o
b43-$(CONFIG_B43_LEDS) += leds.o
b43-$(CONFIG_B43_PCMCIA) += pcmcia.o
+b43-$(CONFIG_B43_SDIO) += sdio.o
b43-$(CONFIG_B43_DEBUG) += debugfs.o
obj-$(CONFIG_B43) += b43.o
diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h
index 09cfe68537b..fa1549a03c7 100644
--- a/drivers/net/wireless/b43/b43.h
+++ b/drivers/net/wireless/b43/b43.h
@@ -629,13 +629,6 @@ struct b43_wl {
* from the mac80211 subsystem. */
u16 mac80211_initially_registered_queues;
- /* R/W lock for data transmission.
- * Transmissions on 2+ queues can run concurrently, but somebody else
- * might sync with TX by write_lock_irqsave()'ing. */
- rwlock_t tx_lock;
- /* Lock for LEDs access. */
- spinlock_t leds_lock;
-
/* We can only have one operating interface (802.11 core)
* at a time. General information about this interface follows.
*/
@@ -686,6 +679,9 @@ struct b43_wl {
struct work_struct tx_work;
/* Queue of packets to be transmitted. */
struct sk_buff_head tx_queue;
+
+ /* The device LEDs. */
+ struct b43_leds leds;
};
/* The type of the firmware file. */
@@ -768,13 +764,10 @@ struct b43_wldev {
/* The device initialization status.
* Use b43_status() to query. */
atomic_t __init_status;
- /* Saved init status for handling suspend. */
- int suspend_init_status;
bool bad_frames_preempt; /* Use "Bad Frames Preemption" (default off) */
bool dfq_valid; /* Directed frame queue valid (IBSS PS mode, ATIM) */
bool radio_hw_enable; /* saved state of radio hardware enabled state */
- bool suspend_in_progress; /* TRUE, if we are in a suspend/resume cycle */
bool qos_enabled; /* TRUE, if QoS is used. */
bool hwcrypto_enabled; /* TRUE, if HW crypto acceleration is enabled. */
@@ -794,12 +787,6 @@ struct b43_wldev {
/* Various statistics about the physical device. */
struct b43_stats stats;
- /* The device LEDs. */
- struct b43_led led_tx;
- struct b43_led led_rx;
- struct b43_led led_assoc;
- struct b43_led led_radio;
-
/* Reason code of the last interrupt. */
u32 irq_reason;
u32 dma_reason[6];
@@ -830,6 +817,10 @@ struct b43_wldev {
/* Debugging stuff follows. */
#ifdef CONFIG_B43_DEBUG
struct b43_dfsentry *dfsentry;
+ unsigned int irq_count;
+ unsigned int irq_bit_count[32];
+ unsigned int tx_count;
+ unsigned int rx_count;
#endif
};
diff --git a/drivers/net/wireless/b43/debugfs.c b/drivers/net/wireless/b43/debugfs.c
index 8f64943e3f6..80b19a44a40 100644
--- a/drivers/net/wireless/b43/debugfs.c
+++ b/drivers/net/wireless/b43/debugfs.c
@@ -689,6 +689,7 @@ static void b43_add_dynamic_debug(struct b43_wldev *dev)
add_dyn_dbg("debug_lo", B43_DBG_LO, 0);
add_dyn_dbg("debug_firmware", B43_DBG_FIRMWARE, 0);
add_dyn_dbg("debug_keys", B43_DBG_KEYS, 0);
+ add_dyn_dbg("debug_verbose_stats", B43_DBG_VERBOSESTATS, 0);
#undef add_dyn_dbg
}
diff --git a/drivers/net/wireless/b43/debugfs.h b/drivers/net/wireless/b43/debugfs.h
index e47b4b488b0..822aad8842f 100644
--- a/drivers/net/wireless/b43/debugfs.h
+++ b/drivers/net/wireless/b43/debugfs.h
@@ -13,6 +13,7 @@ enum b43_dyndbg { /* Dynamic debugging features */
B43_DBG_LO,
B43_DBG_FIRMWARE,
B43_DBG_KEYS,
+ B43_DBG_VERBOSESTATS,
__B43_NR_DYNDBG,
};
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index a467ee260a1..8701034569f 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -1428,9 +1428,9 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
ring->nr_failed_tx_packets++;
ring->nr_total_packet_tries += status->frame_count;
#endif /* DEBUG */
- ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb);
+ ieee80211_tx_status(dev->wl->hw, meta->skb);
- /* skb is freed by ieee80211_tx_status_irqsafe() */
+ /* skb is freed by ieee80211_tx_status() */
meta->skb = NULL;
} else {
/* No need to call free_descriptor_buffer here, as
diff --git a/drivers/net/wireless/b43/leds.c b/drivers/net/wireless/b43/leds.c
index c8b317094c3..fbe3d4f62ce 100644
--- a/drivers/net/wireless/b43/leds.c
+++ b/drivers/net/wireless/b43/leds.c
@@ -34,57 +34,88 @@
static void b43_led_turn_on(struct b43_wldev *dev, u8 led_index,
bool activelow)
{
- struct b43_wl *wl = dev->wl;
- unsigned long flags;
u16 ctl;
- spin_lock_irqsave(&wl->leds_lock, flags);
ctl = b43_read16(dev, B43_MMIO_GPIO_CONTROL);
if (activelow)
ctl &= ~(1 << led_index);
else
ctl |= (1 << led_index);
b43_write16(dev, B43_MMIO_GPIO_CONTROL, ctl);
- spin_unlock_irqrestore(&wl->leds_lock, flags);
}
static void b43_led_turn_off(struct b43_wldev *dev, u8 led_index,
bool activelow)
{
- struct b43_wl *wl = dev->wl;
- unsigned long flags;
u16 ctl;
- spin_lock_irqsave(&wl->leds_lock, flags);
ctl = b43_read16(dev, B43_MMIO_GPIO_CONTROL);
if (activelow)
ctl |= (1 << led_index);
else
ctl &= ~(1 << led_index);
b43_write16(dev, B43_MMIO_GPIO_CONTROL, ctl);
- spin_unlock_irqrestore(&wl->leds_lock, flags);
}
-/* Callback from the LED subsystem. */
-static void b43_led_brightness_set(struct led_classdev *led_dev,
- enum led_brightness brightness)
+static void b43_led_update(struct b43_wldev *dev,
+ struct b43_led *led)
{
- struct b43_led *led = container_of(led_dev, struct b43_led, led_dev);
- struct b43_wldev *dev = led->dev;
bool radio_enabled;
+ bool turn_on;
- if (unlikely(b43_status(dev) < B43_STAT_INITIALIZED))
+ if (!led->wl)
return;
- /* Checking the radio-enabled status here is slightly racy,
- * but we want to avoid the locking overhead and we don't care
- * whether the LED has the wrong state for a second. */
radio_enabled = (dev->phy.radio_on && dev->radio_hw_enable);
- if (brightness == LED_OFF || !radio_enabled)
- b43_led_turn_off(dev, led->index, led->activelow);
+ /* The led->state read is racy, but we don't care. In case we raced
+ * with the brightness_set handler, we will be called again soon
+ * to fixup our state. */
+ if (radio_enabled)
+ turn_on = atomic_read(&led->state) != LED_OFF;
else
+ turn_on = 0;
+ if (turn_on == led->hw_state)
+ return;
+ led->hw_state = turn_on;
+
+ if (turn_on)
b43_led_turn_on(dev, led->index, led->activelow);
+ else
+ b43_led_turn_off(dev, led->index, led->activelow);
+}
+
+static void b43_leds_work(struct work_struct *work)
+{
+ struct b43_leds *leds = container_of(work, struct b43_leds, work);
+ struct b43_wl *wl = container_of(leds, struct b43_wl, leds);
+ struct b43_wldev *dev;
+
+ mutex_lock(&wl->mutex);
+ dev = wl->current_dev;
+ if (unlikely(!dev || b43_status(dev) < B43_STAT_STARTED))
+ goto out_unlock;
+
+ b43_led_update(dev, &wl->leds.led_tx);
+ b43_led_update(dev, &wl->leds.led_rx);
+ b43_led_update(dev, &wl->leds.led_radio);
+ b43_led_update(dev, &wl->leds.led_assoc);
+
+out_unlock:
+ mutex_unlock(&wl->mutex);
+}
+
+/* Callback from the LED subsystem. */
+static void b43_led_brightness_set(struct led_classdev *led_dev,
+ enum led_brightness brightness)
+{
+ struct b43_led *led = container_of(led_dev, struct b43_led, led_dev);
+ struct b43_wl *wl = led->wl;
+
+ if (likely(!wl->leds.stop)) {
+ atomic_set(&led->state, brightness);
+ ieee80211_queue_work(wl->hw, &wl->leds.work);
+ }
}
static int b43_register_led(struct b43_wldev *dev, struct b43_led *led,
@@ -93,15 +124,15 @@ static int b43_register_led(struct b43_wldev *dev, struct b43_led *led,
{
int err;
- b43_led_turn_off(dev, led_index, activelow);
- if (led->dev)
+ if (led->wl)
return -EEXIST;
if (!default_trigger)
return -EINVAL;
- led->dev = dev;
+ led->wl = dev->wl;
led->index = led_index;
led->activelow = activelow;
strncpy(led->name, name, sizeof(led->name));
+ atomic_set(&led->state, 0);
led->led_dev.name = led->name;
led->led_dev.default_trigger = default_trigger;
@@ -110,19 +141,19 @@ static int b43_register_led(struct b43_wldev *dev, struct b43_led *led,
err = led_classdev_register(dev->dev->dev, &led->led_dev);
if (err) {
b43warn(dev->wl, "LEDs: Failed to register %s\n", name);
- led->dev = NULL;
+ led->wl = NULL;
return err;
}
+
return 0;
}
static void b43_unregister_led(struct b43_led *led)
{
- if (!led->dev)
+ if (!led->wl)
return;
led_classdev_unregister(&led->led_dev);
- b43_led_turn_off(led->dev, led->index, led->activelow);
- led->dev = NULL;
+ led->wl = NULL;
}
static void b43_map_led(struct b43_wldev *dev,
@@ -137,24 +168,20 @@ static void b43_map_led(struct b43_wldev *dev,
* generic LED triggers. */
switch (behaviour) {
case B43_LED_INACTIVE:
- break;
case B43_LED_OFF:
- b43_led_turn_off(dev, led_index, activelow);
- break;
case B43_LED_ON:
- b43_led_turn_on(dev, led_index, activelow);
break;
case B43_LED_ACTIVITY:
case B43_LED_TRANSFER:
case B43_LED_APTRANSFER:
snprintf(name, sizeof(name),
"b43-%s::tx", wiphy_name(hw->wiphy));
- b43_register_led(dev, &dev->led_tx, name,
+ b43_register_led(dev, &dev->wl->leds.led_tx, name,
ieee80211_get_tx_led_name(hw),
led_index, activelow);
snprintf(name, sizeof(name),
"b43-%s::rx", wiphy_name(hw->wiphy));
- b43_register_led(dev, &dev->led_rx, name,
+ b43_register_led(dev, &dev->wl->leds.led_rx, name,
ieee80211_get_rx_led_name(hw),
led_index, activelow);
break;
@@ -164,18 +191,15 @@ static void b43_map_led(struct b43_wldev *dev,
case B43_LED_MODE_BG:
snprintf(name, sizeof(name),
"b43-%s::radio", wiphy_name(hw->wiphy));
- b43_register_led(dev, &dev->led_radio, name,
+ b43_register_led(dev, &dev->wl->leds.led_radio, name,
ieee80211_get_radio_led_name(hw),
led_index, activelow);
- /* Sync the RF-kill LED state with radio and switch states. */
- if (dev->phy.radio_on && b43_is_hw_radio_enabled(dev))
- b43_led_turn_on(dev, led_index, activelow);
break;
case B43_LED_WEIRD:
case B43_LED_ASSOC:
snprintf(name, sizeof(name),
"b43-%s::assoc", wiphy_name(hw->wiphy));
- b43_register_led(dev, &dev->led_assoc, name,
+ b43_register_led(dev, &dev->wl->leds.led_assoc, name,
ieee80211_get_assoc_led_name(hw),
led_index, activelow);
break;
@@ -186,58 +210,150 @@ static void b43_map_led(struct b43_wldev *dev,
}
}
-void b43_leds_init(struct b43_wldev *dev)
+static void b43_led_get_sprominfo(struct b43_wldev *dev,
+ unsigned int led_index,
+ enum b43_led_behaviour *behaviour,
+ bool *activelow)
{
struct ssb_bus *bus = dev->dev->bus;
u8 sprom[4];
- int i;
- enum b43_led_behaviour behaviour;
- bool activelow;
sprom[0] = bus->sprom.gpio0;
sprom[1] = bus->sprom.gpio1;
sprom[2] = bus->sprom.gpio2;
sprom[3] = bus->sprom.gpio3;
- for (i = 0; i < 4; i++) {
- if (sprom[i] == 0xFF) {
- /* There is no LED information in the SPROM
- * for this LED. Hardcode it here. */
- activelow = 0;
- switch (i) {
- case 0:
- behaviour = B43_LED_ACTIVITY;
- activelow = 1;
- if (bus->boardinfo.vendor == PCI_VENDOR_ID_COMPAQ)
- behaviour = B43_LED_RADIO_ALL;
- break;
- case 1:
- behaviour = B43_LED_RADIO_B;
- if (bus->boardinfo.vendor == PCI_VENDOR_ID_ASUSTEK)
- behaviour = B43_LED_ASSOC;
- break;
- case 2:
- behaviour = B43_LED_RADIO_A;
- break;
- case 3:
- behaviour = B43_LED_OFF;
- break;
- default:
- B43_WARN_ON(1);
- return;
- }
+ if (sprom[led_index] == 0xFF) {
+ /* There is no LED information in the SPROM
+ * for this LED. Hardcode it here. */
+ *activelow = 0;
+ switch (led_index) {
+ case 0:
+ *behaviour = B43_LED_ACTIVITY;
+ *activelow = 1;
+ if (bus->boardinfo.vendor == PCI_VENDOR_ID_COMPAQ)
+ *behaviour = B43_LED_RADIO_ALL;
+ break;
+ case 1:
+ *behaviour = B43_LED_RADIO_B;
+ if (bus->boardinfo.vendor == PCI_VENDOR_ID_ASUSTEK)
+ *behaviour = B43_LED_ASSOC;
+ break;
+ case 2:
+ *behaviour = B43_LED_RADIO_A;
+ break;
+ case 3:
+ *behaviour = B43_LED_OFF;
+ break;
+ default:
+ B43_WARN_ON(1);
+ return;
+ }
+ } else {
+ *behaviour = sprom[led_index] & B43_LED_BEHAVIOUR;
+ *activelow = !!(sprom[led_index] & B43_LED_ACTIVELOW);
+ }
+}
+
+void b43_leds_init(struct b43_wldev *dev)
+{
+ struct b43_led *led;
+ unsigned int i;
+ enum b43_led_behaviour behaviour;
+ bool activelow;
+
+ /* Sync the RF-kill LED state (if we have one) with radio and switch states. */
+ led = &dev->wl->leds.led_radio;
+ if (led->wl) {
+ if (dev->phy.radio_on && b43_is_hw_radio_enabled(dev)) {
+ b43_led_turn_on(dev, led->index, led->activelow);
+ led->hw_state = 1;
+ atomic_set(&led->state, 1);
} else {
- behaviour = sprom[i] & B43_LED_BEHAVIOUR;
- activelow = !!(sprom[i] & B43_LED_ACTIVELOW);
+ b43_led_turn_off(dev, led->index, led->activelow);
+ led->hw_state = 0;
+ atomic_set(&led->state, 0);
}
- b43_map_led(dev, i, behaviour, activelow);
}
+
+ /* Initialize TX/RX/ASSOC leds */
+ led = &dev->wl->leds.led_tx;
+ if (led->wl) {
+ b43_led_turn_off(dev, led->index, led->activelow);
+ led->hw_state = 0;
+ atomic_set(&led->state, 0);
+ }
+ led = &dev->wl->leds.led_rx;
+ if (led->wl) {
+ b43_led_turn_off(dev, led->index, led->activelow);
+ led->hw_state = 0;
+ atomic_set(&led->state, 0);
+ }
+ led = &dev->wl->leds.led_assoc;
+ if (led->wl) {
+ b43_led_turn_off(dev, led->index, led->activelow);
+ led->hw_state = 0;
+ atomic_set(&led->state, 0);
+ }
+
+ /* Initialize other LED states. */
+ for (i = 0; i < B43_MAX_NR_LEDS; i++) {
+ b43_led_get_sprominfo(dev, i, &behaviour, &activelow);
+ switch (behaviour) {
+ case B43_LED_OFF:
+ b43_led_turn_off(dev, i, activelow);
+ break;
+ case B43_LED_ON:
+ b43_led_turn_on(dev, i, activelow);
+ break;
+ default:
+ /* Leave others as-is. */
+ break;
+ }
+ }
+
+ dev->wl->leds.stop = 0;
}
void b43_leds_exit(struct b43_wldev *dev)
{
- b43_unregister_led(&dev->led_tx);
- b43_unregister_led(&dev->led_rx);
- b43_unregister_led(&dev->led_assoc);
- b43_unregister_led(&dev->led_radio);
+ struct b43_leds *leds = &dev->wl->leds;
+
+ b43_led_turn_off(dev, leds->led_tx.index, leds->led_tx.activelow);
+ b43_led_turn_off(dev, leds->led_rx.index, leds->led_rx.activelow);
+ b43_led_turn_off(dev, leds->led_assoc.index, leds->led_assoc.activelow);
+ b43_led_turn_off(dev, leds->led_radio.index, leds->led_radio.activelow);
+}
+
+void b43_leds_stop(struct b43_wldev *dev)
+{
+ struct b43_leds *leds = &dev->wl->leds;
+
+ leds->stop = 1;
+ cancel_work_sync(&leds->work);
+}
+
+void b43_leds_register(struct b43_wldev *dev)
+{
+ unsigned int i;
+ enum b43_led_behaviour behaviour;
+ bool activelow;
+
+ INIT_WORK(&dev->wl->leds.work, b43_leds_work);
+
+ /* Register the LEDs to the LED subsystem. */
+ for (i = 0; i < B43_MAX_NR_LEDS; i++) {
+ b43_led_get_sprominfo(dev, i, &behaviour, &activelow);
+ b43_map_led(dev, i, behaviour, activelow);
+ }
+}
+
+void b43_leds_unregister(struct b43_wldev *dev)
+{
+ struct b43_leds *leds = &dev->wl->leds;
+
+ b43_unregister_led(&leds->led_tx);
+ b43_unregister_led(&leds->led_rx);
+ b43_unregister_led(&leds->led_assoc);
+ b43_unregister_led(&leds->led_radio);
}
diff --git a/drivers/net/wireless/b43/leds.h b/drivers/net/wireless/b43/leds.h
index b8b1dd52124..9592e4c5a5f 100644
--- a/drivers/net/wireless/b43/leds.h
+++ b/drivers/net/wireless/b43/leds.h
@@ -7,12 +7,13 @@ struct b43_wldev;
#include <linux/types.h>
#include <linux/leds.h>
+#include <linux/workqueue.h>
#define B43_LED_MAX_NAME_LEN 31
struct b43_led {
- struct b43_wldev *dev;
+ struct b43_wl *wl;
/* The LED class device */
struct led_classdev led_dev;
/* The index number of the LED. */
@@ -22,8 +23,24 @@ struct b43_led {
bool activelow;
/* The unique name string for this LED device. */
char name[B43_LED_MAX_NAME_LEN + 1];
+ /* The current status of the LED. This is updated locklessly. */
+ atomic_t state;
+ /* The active state in hardware. */
+ bool hw_state;
};
+struct b43_leds {
+ struct b43_led led_tx;
+ struct b43_led led_rx;
+ struct b43_led led_radio;
+ struct b43_led led_assoc;
+
+ bool stop;
+ struct work_struct work;
+};
+
+#define B43_MAX_NR_LEDS 4
+
#define B43_LED_BEHAVIOUR 0x7F
#define B43_LED_ACTIVELOW 0x80
/* LED behaviour values */
@@ -42,23 +59,35 @@ enum b43_led_behaviour {
B43_LED_INACTIVE,
};
+void b43_leds_register(struct b43_wldev *dev);
+void b43_leds_unregister(struct b43_wldev *dev);
void b43_leds_init(struct b43_wldev *dev);
void b43_leds_exit(struct b43_wldev *dev);
+void b43_leds_stop(struct b43_wldev *dev);
#else /* CONFIG_B43_LEDS */
/* LED support disabled */
-struct b43_led {
+struct b43_leds {
/* empty */
};
+static inline void b43_leds_register(struct b43_wldev *dev)
+{
+}
+static inline void b43_leds_unregister(struct b43_wldev *dev)
+{
+}
static inline void b43_leds_init(struct b43_wldev *dev)
{
}
static inline void b43_leds_exit(struct b43_wldev *dev)
{
}
+static inline void b43_leds_stop(struct b43_wldev *dev)
+{
+}
#endif /* CONFIG_B43_LEDS */
#endif /* B43_LEDS_H_ */
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index e789792a36b..9b907a36bb8 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -8,6 +8,9 @@
Copyright (c) 2005 Danny van Dyk <kugelfang@gentoo.org>
Copyright (c) 2005 Andreas Jaggi <andreas.jaggi@waterwave.ch>
+ SDIO support
+ Copyright (c) 2009 Albert Herranz <albert_herranz@yahoo.es>
+
Some parts of the code in this file are derived from the ipw2200
driver Copyright(c) 2003 - 2004 Intel Corporation.
@@ -53,6 +56,8 @@
#include "xmit.h"
#include "lo.h"
#include "pcmcia.h"
+#include "sdio.h"
+#include <linux/mmc/sdio_func.h>
MODULE_DESCRIPTION("Broadcom B43 wireless driver");
MODULE_AUTHOR("Martin Langer");
@@ -1587,7 +1592,7 @@ static void b43_beacon_update_trigger_work(struct work_struct *work)
mutex_lock(&wl->mutex);
dev = wl->current_dev;
if (likely(dev && (b43_status(dev) >= B43_STAT_INITIALIZED))) {
- if (0 /*FIXME dev->dev->bus->bustype == SSB_BUSTYPE_SDIO*/) {
+ if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
/* wl->mutex is enough. */
b43_do_beacon_update_trigger_work(dev);
mmiowb();
@@ -1825,6 +1830,16 @@ static void b43_do_interrupt_thread(struct b43_wldev *dev)
/* Re-enable interrupts on the device by restoring the current interrupt mask. */
b43_write32(dev, B43_MMIO_GEN_IRQ_MASK, dev->irq_mask);
+
+#if B43_DEBUG
+ if (b43_debug(dev, B43_DBG_VERBOSESTATS)) {
+ dev->irq_count++;
+ for (i = 0; i < ARRAY_SIZE(dev->irq_bit_count); i++) {
+ if (reason & (1 << i))
+ dev->irq_bit_count[i]++;
+ }
+ }
+#endif
}
/* Interrupt thread handler. Handles device interrupts in thread context. */
@@ -1905,6 +1920,21 @@ static irqreturn_t b43_interrupt_handler(int irq, void *dev_id)
return ret;
}
+/* SDIO interrupt handler. This runs in process context. */
+static void b43_sdio_interrupt_handler(struct b43_wldev *dev)
+{
+ struct b43_wl *wl = dev->wl;
+ irqreturn_t ret;
+
+ mutex_lock(&wl->mutex);
+
+ ret = b43_do_interrupt(dev);
+ if (ret == IRQ_WAKE_THREAD)
+ b43_do_interrupt_thread(dev);
+
+ mutex_unlock(&wl->mutex);
+}
+
void b43_do_release_fw(struct b43_firmware_file *fw)
{
release_firmware(fw->data);
@@ -2645,6 +2675,20 @@ static void b43_adjust_opmode(struct b43_wldev *dev)
cfp_pretbtt = 50;
}
b43_write16(dev, 0x612, cfp_pretbtt);
+
+ /* FIXME: We don't currently implement the PMQ mechanism,
+ * so always disable it. If we want to implement PMQ,
+ * we need to enable it here (clear DISCPMQ) in AP mode.
+ */
+ if (0 /* ctl & B43_MACCTL_AP */) {
+ b43_write32(dev, B43_MMIO_MACCTL,
+ b43_read32(dev, B43_MMIO_MACCTL)
+ & ~B43_MACCTL_DISCPMQ);
+ } else {
+ b43_write32(dev, B43_MMIO_MACCTL,
+ b43_read32(dev, B43_MMIO_MACCTL)
+ | B43_MACCTL_DISCPMQ);
+ }
}
static void b43_rate_memory_write(struct b43_wldev *dev, u16 rate, int is_ofdm)
@@ -2873,6 +2917,27 @@ static void b43_periodic_every15sec(struct b43_wldev *dev)
atomic_set(&phy->txerr_cnt, B43_PHY_TX_BADNESS_LIMIT);
wmb();
+
+#if B43_DEBUG
+ if (b43_debug(dev, B43_DBG_VERBOSESTATS)) {
+ unsigned int i;
+
+ b43dbg(dev->wl, "Stats: %7u IRQs/sec, %7u TX/sec, %7u RX/sec\n",
+ dev->irq_count / 15,
+ dev->tx_count / 15,
+ dev->rx_count / 15);
+ dev->irq_count = 0;
+ dev->tx_count = 0;
+ dev->rx_count = 0;
+ for (i = 0; i < ARRAY_SIZE(dev->irq_bit_count); i++) {
+ if (dev->irq_bit_count[i]) {
+ b43dbg(dev->wl, "Stats: %7u IRQ-%02u/sec (0x%08X)\n",
+ dev->irq_bit_count[i] / 15, i, (1 << i));
+ dev->irq_bit_count[i] = 0;
+ }
+ }
+ }
+#endif
}
static void do_periodic_work(struct b43_wldev *dev)
@@ -3002,14 +3067,18 @@ static void b43_security_init(struct b43_wldev *dev)
static int b43_rng_read(struct hwrng *rng, u32 *data)
{
struct b43_wl *wl = (struct b43_wl *)rng->priv;
+ struct b43_wldev *dev;
+ int count = -ENODEV;
- /* FIXME: We need to take wl->mutex here to make sure the device
- * is not going away from under our ass. However it could deadlock
- * with hwrng internal locking. */
-
- *data = b43_read16(wl->current_dev, B43_MMIO_RNG);
+ mutex_lock(&wl->mutex);
+ dev = wl->current_dev;
+ if (likely(dev && b43_status(dev) >= B43_STAT_INITIALIZED)) {
+ *data = b43_read16(dev, B43_MMIO_RNG);
+ count = sizeof(u16);
+ }
+ mutex_unlock(&wl->mutex);
- return (sizeof(u16));
+ return count;
}
#endif /* CONFIG_B43_HWRNG */
@@ -3068,6 +3137,9 @@ static void b43_tx_work(struct work_struct *work)
dev_kfree_skb(skb); /* Drop it */
}
+#if B43_DEBUG
+ dev->tx_count++;
+#endif
mutex_unlock(&wl->mutex);
}
@@ -3820,7 +3892,7 @@ redo:
/* Disable interrupts on the device. */
b43_set_status(dev, B43_STAT_INITIALIZED);
- if (0 /*FIXME dev->dev->bus->bustype == SSB_BUSTYPE_SDIO*/) {
+ if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
/* wl->mutex is locked. That is enough. */
b43_write32(dev, B43_MMIO_GEN_IRQ_MASK, 0);
b43_read32(dev, B43_MMIO_GEN_IRQ_MASK); /* Flush */
@@ -3830,10 +3902,15 @@ redo:
b43_read32(dev, B43_MMIO_GEN_IRQ_MASK); /* Flush */
spin_unlock_irq(&wl->hardirq_lock);
}
- /* Synchronize the interrupt handlers. Unlock to avoid deadlocks. */
+ /* Synchronize and free the interrupt handlers. Unlock to avoid deadlocks. */
orig_dev = dev;
mutex_unlock(&wl->mutex);
- synchronize_irq(dev->dev->irq);
+ if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
+ b43_sdio_free_irq(dev);
+ } else {
+ synchronize_irq(dev->dev->irq);
+ free_irq(dev->dev->irq, dev);
+ }
mutex_lock(&wl->mutex);
dev = wl->current_dev;
if (!dev)
@@ -3850,7 +3927,7 @@ redo:
dev_kfree_skb(skb_dequeue(&wl->tx_queue));
b43_mac_suspend(dev);
- free_irq(dev->dev->irq, dev);
+ b43_leds_exit(dev);
b43dbg(wl, "Wireless interface stopped\n");
return dev;
@@ -3864,12 +3941,20 @@ static int b43_wireless_core_start(struct b43_wldev *dev)
B43_WARN_ON(b43_status(dev) != B43_STAT_INITIALIZED);
drain_txstatus_queue(dev);
- err = request_threaded_irq(dev->dev->irq, b43_interrupt_handler,
- b43_interrupt_thread_handler,
- IRQF_SHARED, KBUILD_MODNAME, dev);
- if (err) {
- b43err(dev->wl, "Cannot request IRQ-%d\n", dev->dev->irq);
- goto out;
+ if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
+ err = b43_sdio_request_irq(dev, b43_sdio_interrupt_handler);
+ if (err) {
+ b43err(dev->wl, "Cannot request SDIO IRQ\n");
+ goto out;
+ }
+ } else {
+ err = request_threaded_irq(dev->dev->irq, b43_interrupt_handler,
+ b43_interrupt_thread_handler,
+ IRQF_SHARED, KBUILD_MODNAME, dev);
+ if (err) {
+ b43err(dev->wl, "Cannot request IRQ-%d\n", dev->dev->irq);
+ goto out;
+ }
}
/* We are ready to run. */
@@ -3882,8 +3967,10 @@ static int b43_wireless_core_start(struct b43_wldev *dev)
/* Start maintainance work */
b43_periodic_tasks_setup(dev);
+ b43_leds_init(dev);
+
b43dbg(dev->wl, "Wireless interface started\n");
- out:
+out:
return err;
}
@@ -4160,10 +4247,6 @@ static void b43_wireless_core_exit(struct b43_wldev *dev)
macctl |= B43_MACCTL_PSM_JMP0;
b43_write32(dev, B43_MMIO_MACCTL, macctl);
- if (!dev->suspend_in_progress) {
- b43_leds_exit(dev);
- b43_rng_exit(dev->wl);
- }
b43_dma_free(dev);
b43_pio_free(dev);
b43_chip_exit(dev);
@@ -4180,7 +4263,6 @@ static void b43_wireless_core_exit(struct b43_wldev *dev)
/* Initialize a wireless core */
static int b43_wireless_core_init(struct b43_wldev *dev)
{
- struct b43_wl *wl = dev->wl;
struct ssb_bus *bus = dev->dev->bus;
struct ssb_sprom *sprom = &bus->sprom;
struct b43_phy *phy = &dev->phy;
@@ -4264,7 +4346,9 @@ static int b43_wireless_core_init(struct b43_wldev *dev)
/* Maximum Contention Window */
b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_MAXCONT, 0x3FF);
- if ((dev->dev->bus->bustype == SSB_BUSTYPE_PCMCIA) || B43_FORCE_PIO) {
+ if ((dev->dev->bus->bustype == SSB_BUSTYPE_PCMCIA) ||
+ (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) ||
+ B43_FORCE_PIO) {
dev->__using_pio_transfers = 1;
err = b43_pio_init(dev);
} else {
@@ -4280,15 +4364,13 @@ static int b43_wireless_core_init(struct b43_wldev *dev)
ssb_bus_powerup(bus, !(sprom->boardflags_lo & B43_BFL_XTAL_NOSLOW));
b43_upload_card_macaddress(dev);
b43_security_init(dev);
- if (!dev->suspend_in_progress)
- b43_rng_init(wl);
+
+ ieee80211_wake_queues(dev->wl->hw);
ieee80211_wake_queues(dev->wl->hw);
b43_set_status(dev, B43_STAT_INITIALIZED);
- if (!dev->suspend_in_progress)
- b43_leds_init(dev);
out:
return err;
@@ -4837,7 +4919,6 @@ static int b43_wireless_init(struct ssb_device *dev)
/* Initialize struct b43_wl */
wl->hw = hw;
- spin_lock_init(&wl->leds_lock);
mutex_init(&wl->mutex);
spin_lock_init(&wl->hardirq_lock);
INIT_LIST_HEAD(&wl->devlist);
@@ -4878,6 +4959,8 @@ static int b43_probe(struct ssb_device *dev, const struct ssb_device_id *id)
err = ieee80211_register_hw(wl->hw);
if (err)
goto err_one_core_detach;
+ b43_leds_register(wl->current_dev);
+ b43_rng_init(wl);
}
out:
@@ -4906,12 +4989,15 @@ static void b43_remove(struct ssb_device *dev)
* might have modified it. Restoring is important, so the networking
* stack can properly free resources. */
wl->hw->queues = wl->mac80211_initially_registered_queues;
+ b43_leds_stop(wldev);
ieee80211_unregister_hw(wl->hw);
}
b43_one_core_detach(dev);
if (list_empty(&wl->devlist)) {
+ b43_rng_exit(wl);
+ b43_leds_unregister(wldev);
/* Last core on the chip unregistered.
* We can destroy common struct b43_wl.
*/
@@ -4929,80 +5015,17 @@ void b43_controller_restart(struct b43_wldev *dev, const char *reason)
ieee80211_queue_work(dev->wl->hw, &dev->restart_work);
}
-#ifdef CONFIG_PM
-
-static int b43_suspend(struct ssb_device *dev, pm_message_t state)
-{
- struct b43_wldev *wldev = ssb_get_drvdata(dev);
- struct b43_wl *wl = wldev->wl;
-
- b43dbg(wl, "Suspending...\n");
-
- mutex_lock(&wl->mutex);
- wldev->suspend_in_progress = true;
- wldev->suspend_init_status = b43_status(wldev);
- if (wldev->suspend_init_status >= B43_STAT_STARTED)
- wldev = b43_wireless_core_stop(wldev);
- if (wldev && wldev->suspend_init_status >= B43_STAT_INITIALIZED)
- b43_wireless_core_exit(wldev);
- mutex_unlock(&wl->mutex);
-
- b43dbg(wl, "Device suspended.\n");
-
- return 0;
-}
-
-static int b43_resume(struct ssb_device *dev)
-{
- struct b43_wldev *wldev = ssb_get_drvdata(dev);
- struct b43_wl *wl = wldev->wl;
- int err = 0;
-
- b43dbg(wl, "Resuming...\n");
-
- mutex_lock(&wl->mutex);
- if (wldev->suspend_init_status >= B43_STAT_INITIALIZED) {
- err = b43_wireless_core_init(wldev);
- if (err) {
- b43err(wl, "Resume failed at core init\n");
- goto out;
- }
- }
- if (wldev->suspend_init_status >= B43_STAT_STARTED) {
- err = b43_wireless_core_start(wldev);
- if (err) {
- b43_leds_exit(wldev);
- b43_rng_exit(wldev->wl);
- b43_wireless_core_exit(wldev);
- b43err(wl, "Resume failed at core start\n");
- goto out;
- }
- }
- b43dbg(wl, "Device resumed.\n");
- out:
- wldev->suspend_in_progress = false;
- mutex_unlock(&wl->mutex);
- return err;
-}
-
-#else /* CONFIG_PM */
-# define b43_suspend NULL
-# define b43_resume NULL
-#endif /* CONFIG_PM */
-
static struct ssb_driver b43_ssb_driver = {
.name = KBUILD_MODNAME,
.id_table = b43_ssb_tbl,
.probe = b43_probe,
.remove = b43_remove,
- .suspend = b43_suspend,
- .resume = b43_resume,
};
static void b43_print_driverinfo(void)
{
const char *feat_pci = "", *feat_pcmcia = "", *feat_nphy = "",
- *feat_leds = "";
+ *feat_leds = "", *feat_sdio = "";
#ifdef CONFIG_B43_PCI_AUTOSELECT
feat_pci = "P";
@@ -5016,11 +5039,14 @@ static void b43_print_driverinfo(void)
#ifdef CONFIG_B43_LEDS
feat_leds = "L";
#endif
+#ifdef CONFIG_B43_SDIO
+ feat_sdio = "S";
+#endif
printk(KERN_INFO "Broadcom 43xx driver loaded "
- "[ Features: %s%s%s%s, Firmware-ID: "
+ "[ Features: %s%s%s%s%s, Firmware-ID: "
B43_SUPPORTED_FIRMWARE_ID " ]\n",
feat_pci, feat_pcmcia, feat_nphy,
- feat_leds);
+ feat_leds, feat_sdio);
}
static int __init b43_init(void)
@@ -5031,13 +5057,18 @@ static int __init b43_init(void)
err = b43_pcmcia_init();
if (err)
goto err_dfs_exit;
- err = ssb_driver_register(&b43_ssb_driver);
+ err = b43_sdio_init();
if (err)
goto err_pcmcia_exit;
+ err = ssb_driver_register(&b43_ssb_driver);
+ if (err)
+ goto err_sdio_exit;
b43_print_driverinfo();
return err;
+err_sdio_exit:
+ b43_sdio_exit();
err_pcmcia_exit:
b43_pcmcia_exit();
err_dfs_exit:
@@ -5048,6 +5079,7 @@ err_dfs_exit:
static void __exit b43_exit(void)
{
ssb_driver_unregister(&b43_ssb_driver);
+ b43_sdio_exit();
b43_pcmcia_exit();
b43_debugfs_exit();
}
diff --git a/drivers/net/wireless/b43/phy_lp.c b/drivers/net/wireless/b43/phy_lp.c
index 3e02d969f68..1e318d815a5 100644
--- a/drivers/net/wireless/b43/phy_lp.c
+++ b/drivers/net/wireless/b43/phy_lp.c
@@ -2228,6 +2228,16 @@ static enum b43_txpwr_result b43_lpphy_op_recalc_txpower(struct b43_wldev *dev,
return B43_TXPWR_RES_DONE;
}
+void b43_lpphy_op_switch_analog(struct b43_wldev *dev, bool on)
+{
+ if (on) {
+ b43_phy_mask(dev, B43_LPPHY_AFE_CTL_OVR, 0xfff8);
+ } else {
+ b43_phy_set(dev, B43_LPPHY_AFE_CTL_OVRVAL, 0x0007);
+ b43_phy_set(dev, B43_LPPHY_AFE_CTL_OVR, 0x0007);
+ }
+}
+
const struct b43_phy_operations b43_phyops_lp = {
.allocate = b43_lpphy_op_allocate,
.free = b43_lpphy_op_free,
@@ -2239,7 +2249,7 @@ const struct b43_phy_operations b43_phyops_lp = {
.radio_read = b43_lpphy_op_radio_read,
.radio_write = b43_lpphy_op_radio_write,
.software_rfkill = b43_lpphy_op_software_rfkill,
- .switch_analog = b43_phyop_switch_analog_generic,
+ .switch_analog = b43_lpphy_op_switch_analog,
.switch_channel = b43_lpphy_op_switch_channel,
.get_default_chan = b43_lpphy_op_get_default_chan,
.set_rx_antenna = b43_lpphy_op_set_rx_antenna,
diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c
index 3498b68385e..e96091b3149 100644
--- a/drivers/net/wireless/b43/pio.c
+++ b/drivers/net/wireless/b43/pio.c
@@ -574,7 +574,7 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev,
q->buffer_used -= total_len;
q->free_packet_slots += 1;
- ieee80211_tx_status_irqsafe(dev->wl->hw, pack->skb);
+ ieee80211_tx_status(dev->wl->hw, pack->skb);
pack->skb = NULL;
list_add(&pack->list, &q->packets_list);
diff --git a/drivers/net/wireless/b43/rfkill.c b/drivers/net/wireless/b43/rfkill.c
index 31e55999893..7a3218c5ba7 100644
--- a/drivers/net/wireless/b43/rfkill.c
+++ b/drivers/net/wireless/b43/rfkill.c
@@ -28,7 +28,7 @@
/* Returns TRUE, if the radio is enabled in hardware. */
bool b43_is_hw_radio_enabled(struct b43_wldev *dev)
{
- if (dev->phy.rev >= 3) {
+ if (dev->phy.rev >= 3 || dev->phy.type == B43_PHYTYPE_LP) {
if (!(b43_read32(dev, B43_MMIO_RADIO_HWENABLED_HI)
& B43_MMIO_RADIO_HWENABLED_HI_MASK))
return 1;
diff --git a/drivers/net/wireless/b43/sdio.c b/drivers/net/wireless/b43/sdio.c
new file mode 100644
index 00000000000..0d3ac64147a
--- /dev/null
+++ b/drivers/net/wireless/b43/sdio.c
@@ -0,0 +1,202 @@
+/*
+ * Broadcom B43 wireless driver
+ *
+ * SDIO over Sonics Silicon Backplane bus glue for b43.
+ *
+ * Copyright (C) 2009 Albert Herranz
+ * Copyright (C) 2009 Michael Buesch <mb@bu3sch.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/sdio_func.h>
+#include <linux/mmc/sdio_ids.h>
+#include <linux/ssb/ssb.h>
+
+#include "sdio.h"
+#include "b43.h"
+
+
+#define HNBU_CHIPID 0x01 /* vendor & device id */
+
+#define B43_SDIO_BLOCK_SIZE 64 /* rx fifo max size in bytes */
+
+
+static const struct b43_sdio_quirk {
+ u16 vendor;
+ u16 device;
+ unsigned int quirks;
+} b43_sdio_quirks[] = {
+ { 0x14E4, 0x4318, SSB_QUIRK_SDIO_READ_AFTER_WRITE32, },
+ { },
+};
+
+
+static unsigned int b43_sdio_get_quirks(u16 vendor, u16 device)
+{
+ const struct b43_sdio_quirk *q;
+
+ for (q = b43_sdio_quirks; q->quirks; q++) {
+ if (vendor == q->vendor && device == q->device)
+ return q->quirks;
+ }
+
+ return 0;
+}
+
+static void b43_sdio_interrupt_dispatcher(struct sdio_func *func)
+{
+ struct b43_sdio *sdio = sdio_get_drvdata(func);
+ struct b43_wldev *dev = sdio->irq_handler_opaque;
+
+ if (unlikely(b43_status(dev) < B43_STAT_STARTED))
+ return;
+
+ sdio_release_host(func);
+ sdio->irq_handler(dev);
+ sdio_claim_host(func);
+}
+
+int b43_sdio_request_irq(struct b43_wldev *dev,
+ void (*handler)(struct b43_wldev *dev))
+{
+ struct ssb_bus *bus = dev->dev->bus;
+ struct sdio_func *func = bus->host_sdio;
+ struct b43_sdio *sdio = sdio_get_drvdata(func);
+ int err;
+
+ sdio->irq_handler_opaque = dev;
+ sdio->irq_handler = handler;
+ sdio_claim_host(func);
+ err = sdio_claim_irq(func, b43_sdio_interrupt_dispatcher);
+ sdio_release_host(func);
+
+ return err;
+}
+
+void b43_sdio_free_irq(struct b43_wldev *dev)
+{
+ struct ssb_bus *bus = dev->dev->bus;
+ struct sdio_func *func = bus->host_sdio;
+ struct b43_sdio *sdio = sdio_get_drvdata(func);
+
+ sdio_claim_host(func);
+ sdio_release_irq(func);
+ sdio_release_host(func);
+ sdio->irq_handler_opaque = NULL;
+ sdio->irq_handler = NULL;
+}
+
+static int b43_sdio_probe(struct sdio_func *func,
+ const struct sdio_device_id *id)
+{
+ struct b43_sdio *sdio;
+ struct sdio_func_tuple *tuple;
+ u16 vendor = 0, device = 0;
+ int error;
+
+ /* Look for the card chip identifier. */
+ tuple = func->tuples;
+ while (tuple) {
+ switch (tuple->code) {
+ case 0x80:
+ switch (tuple->data[0]) {
+ case HNBU_CHIPID:
+ if (tuple->size != 5)
+ break;
+ vendor = tuple->data[1] | (tuple->data[2]<<8);
+ device = tuple->data[3] | (tuple->data[4]<<8);
+ dev_info(&func->dev, "Chip ID %04x:%04x\n",
+ vendor, device);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ tuple = tuple->next;
+ }
+ if (!vendor || !device) {
+ error = -ENODEV;
+ goto out;
+ }
+
+ sdio_claim_host(func);
+ error = sdio_set_block_size(func, B43_SDIO_BLOCK_SIZE);
+ if (error) {
+ dev_err(&func->dev, "failed to set block size to %u bytes,"
+ " error %d\n", B43_SDIO_BLOCK_SIZE, error);
+ goto err_release_host;
+ }
+ error = sdio_enable_func(func);
+ if (error) {
+ dev_err(&func->dev, "failed to enable func, error %d\n", error);
+ goto err_release_host;
+ }
+ sdio_release_host(func);
+
+ sdio = kzalloc(sizeof(*sdio), GFP_KERNEL);
+ if (!sdio) {
+ error = -ENOMEM;
+ dev_err(&func->dev, "failed to allocate ssb bus\n");
+ goto err_disable_func;
+ }
+ error = ssb_bus_sdiobus_register(&sdio->ssb, func,
+ b43_sdio_get_quirks(vendor, device));
+ if (error) {
+ dev_err(&func->dev, "failed to register ssb sdio bus,"
+ " error %d\n", error);
+ goto err_free_ssb;
+ }
+ sdio_set_drvdata(func, sdio);
+
+ return 0;
+
+err_free_ssb:
+ kfree(sdio);
+err_disable_func:
+ sdio_disable_func(func);
+err_release_host:
+ sdio_release_host(func);
+out:
+ return error;
+}
+
+static void b43_sdio_remove(struct sdio_func *func)
+{
+ struct b43_sdio *sdio = sdio_get_drvdata(func);
+
+ ssb_bus_unregister(&sdio->ssb);
+ sdio_disable_func(func);
+ kfree(sdio);
+ sdio_set_drvdata(func, NULL);
+}
+
+static const struct sdio_device_id b43_sdio_ids[] = {
+ { SDIO_DEVICE(0x02d0, 0x044b) }, /* Nintendo Wii WLAN daughter card */
+ { },
+};
+
+static struct sdio_driver b43_sdio_driver = {
+ .name = "b43-sdio",
+ .id_table = b43_sdio_ids,
+ .probe = b43_sdio_probe,
+ .remove = b43_sdio_remove,
+};
+
+int b43_sdio_init(void)
+{
+ return sdio_register_driver(&b43_sdio_driver);
+}
+
+void b43_sdio_exit(void)
+{
+ sdio_unregister_driver(&b43_sdio_driver);
+}
diff --git a/drivers/net/wireless/b43/sdio.h b/drivers/net/wireless/b43/sdio.h
new file mode 100644
index 00000000000..fb633094403
--- /dev/null
+++ b/drivers/net/wireless/b43/sdio.h
@@ -0,0 +1,45 @@
+#ifndef B43_SDIO_H_
+#define B43_SDIO_H_
+
+#include <linux/ssb/ssb.h>
+
+struct b43_wldev;
+
+
+#ifdef CONFIG_B43_SDIO
+
+struct b43_sdio {
+ struct ssb_bus ssb;
+ void *irq_handler_opaque;
+ void (*irq_handler)(struct b43_wldev *dev);
+};
+
+int b43_sdio_request_irq(struct b43_wldev *dev,
+ void (*handler)(struct b43_wldev *dev));
+void b43_sdio_free_irq(struct b43_wldev *dev);
+
+int b43_sdio_init(void);
+void b43_sdio_exit(void);
+
+
+#else /* CONFIG_B43_SDIO */
+
+
+int b43_sdio_request_irq(struct b43_wldev *dev,
+ void (*handler)(struct b43_wldev *dev))
+{
+ return -ENODEV;
+}
+void b43_sdio_free_irq(struct b43_wldev *dev)
+{
+}
+static inline int b43_sdio_init(void)
+{
+ return 0;
+}
+static inline void b43_sdio_exit(void)
+{
+}
+
+#endif /* CONFIG_B43_SDIO */
+#endif /* B43_SDIO_H_ */
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 14f541248b5..ac9f600995e 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -690,8 +690,11 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr)
}
memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
- ieee80211_rx_irqsafe(dev->wl->hw, skb);
+ ieee80211_rx(dev->wl->hw, skb);
+#if B43_DEBUG
+ dev->rx_count++;
+#endif
return;
drop:
b43dbg(dev->wl, "RX: Packet dropped\n");
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index ca61d3796ce..3259b884154 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -2021,6 +2021,12 @@ static int iwl4965_tx_status_reply_tx(struct iwl_priv *priv,
agg->frame_count, txq_id, idx);
hdr = iwl_tx_queue_get_hdr(priv, txq_id, idx);
+ if (!hdr) {
+ IWL_ERR(priv,
+ "BUG_ON idx doesn't point to valid skb"
+ " idx=%d, txq_id=%d\n", idx, txq_id);
+ return -1;
+ }
sc = le16_to_cpu(hdr->seq_ctrl);
if (idx != (SEQ_TO_SN(sc) & 0xff)) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index 1d539e3b8db..a6391c7fea5 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -1163,6 +1163,12 @@ static int iwl5000_tx_status_reply_tx(struct iwl_priv *priv,
agg->frame_count, txq_id, idx);
hdr = iwl_tx_queue_get_hdr(priv, txq_id, idx);
+ if (!hdr) {
+ IWL_ERR(priv,
+ "BUG_ON idx doesn't point to valid skb"
+ " idx=%d, txq_id=%d\n", idx, txq_id);
+ return -1;
+ }
sc = le16_to_cpu(hdr->seq_ctrl);
if (idx != (SEQ_TO_SN(sc) & 0xff)) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c
index b90adcb73b0..8e1bb53c0aa 100644
--- a/drivers/net/wireless/iwlwifi/iwl-rx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-rx.c
@@ -250,12 +250,20 @@ void iwl_rx_allocate(struct iwl_priv *priv, gfp_t priority)
}
spin_unlock_irqrestore(&rxq->lock, flags);
+ if (rxq->free_count > RX_LOW_WATERMARK)
+ priority |= __GFP_NOWARN;
/* Alloc a new receive buffer */
skb = alloc_skb(priv->hw_params.rx_buf_size + 256,
priority);
if (!skb) {
- IWL_CRIT(priv, "Can not allocate SKB buffers\n");
+ if (net_ratelimit())
+ IWL_DEBUG_INFO(priv, "Failed to allocate SKB buffer.\n");
+ if ((rxq->free_count <= RX_LOW_WATERMARK) &&
+ net_ratelimit())
+ IWL_CRIT(priv, "Failed to allocate SKB buffer with %s. Only %u free buffers remaining.\n",
+ priority == GFP_ATOMIC ? "GFP_ATOMIC" : "GFP_KERNEL",
+ rxq->free_count);
/* We don't reschedule replenish work here -- we will
* call the restock method and if it still needs
* more buffers it will schedule replenish */
diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c
index a2b9ec82b96..c6633fec821 100644
--- a/drivers/net/wireless/iwlwifi/iwl-sta.c
+++ b/drivers/net/wireless/iwlwifi/iwl-sta.c
@@ -520,7 +520,7 @@ int iwl_send_static_wepkey_cmd(struct iwl_priv *priv, u8 send_if_empty)
struct iwl_host_cmd cmd = {
.id = REPLY_WEPKEY,
.data = wep_cmd,
- .flags = CMD_SYNC,
+ .flags = CMD_ASYNC,
};
memset(wep_cmd, 0, cmd_size +
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 090966837f3..4f2d4393728 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -1146,11 +1146,18 @@ static void iwl3945_rx_allocate(struct iwl_priv *priv, gfp_t priority)
}
spin_unlock_irqrestore(&rxq->lock, flags);
+ if (rxq->free_count > RX_LOW_WATERMARK)
+ priority |= __GFP_NOWARN;
/* Alloc a new receive buffer */
skb = alloc_skb(priv->hw_params.rx_buf_size, priority);
if (!skb) {
if (net_ratelimit())
- IWL_CRIT(priv, ": Can not allocate SKB buffers\n");
+ IWL_DEBUG_INFO(priv, "Failed to allocate SKB buffer.\n");
+ if ((rxq->free_count <= RX_LOW_WATERMARK) &&
+ net_ratelimit())
+ IWL_CRIT(priv, "Failed to allocate SKB buffer with %s. Only %u free buffers remaining.\n",
+ priority == GFP_ATOMIC ? "GFP_ATOMIC" : "GFP_KERNEL",
+ rxq->free_count);
/* We don't reschedule replenish work here -- we will
* call the restock method and if it still needs
* more buffers it will schedule replenish */
diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h
index 5462cb5ad99..567f029a8cd 100644
--- a/drivers/net/wireless/rt2x00/rt2x00lib.h
+++ b/drivers/net/wireless/rt2x00/rt2x00lib.h
@@ -380,7 +380,7 @@ static inline void rt2x00crypto_tx_insert_iv(struct sk_buff *skb,
{
}
-static inline void rt2x00crypto_rx_insert_iv(struct sk_buff *skb, bool l2pad,
+static inline void rt2x00crypto_rx_insert_iv(struct sk_buff *skb,
unsigned int header_length,
struct rxdone_entry_desc *rxdesc)
{
diff --git a/drivers/net/wireless/wl12xx/Kconfig b/drivers/net/wireless/wl12xx/Kconfig
index 7b14d5bc63d..88060e11754 100644
--- a/drivers/net/wireless/wl12xx/Kconfig
+++ b/drivers/net/wireless/wl12xx/Kconfig
@@ -1,5 +1,5 @@
menuconfig WL12XX
- boolean "TI wl12xx driver support"
+ tristate "TI wl12xx driver support"
depends on MAC80211 && WLAN_80211 && EXPERIMENTAL
---help---
This will enable TI wl12xx driver support. The drivers make
diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c
index 38688847d56..23a6a6d4863 100644
--- a/drivers/net/wireless/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zd1211rw/zd_usb.c
@@ -1070,7 +1070,7 @@ static int eject_installer(struct usb_interface *intf)
/* Find bulk out endpoint */
endpoint = &iface_desc->endpoint[1].desc;
- if ((endpoint->bEndpointAddress & USB_TYPE_MASK) == USB_DIR_OUT &&
+ if (usb_endpoint_dir_out(endpoint) &&
usb_endpoint_xfer_bulk(endpoint)) {
bulk_out_ep = endpoint->bEndpointAddress;
} else {
diff --git a/drivers/net/xilinx_emaclite.c b/drivers/net/xilinx_emaclite.c
index dc22782633a..83a044dbd1d 100644
--- a/drivers/net/xilinx_emaclite.c
+++ b/drivers/net/xilinx_emaclite.c
@@ -134,18 +134,15 @@ static void xemaclite_enable_interrupts(struct net_local *drvdata)
}
/* Enable the Rx interrupts for the first buffer */
- reg_data = in_be32(drvdata->base_addr + XEL_RSR_OFFSET);
out_be32(drvdata->base_addr + XEL_RSR_OFFSET,
- reg_data | XEL_RSR_RECV_IE_MASK);
+ XEL_RSR_RECV_IE_MASK);
/* Enable the Rx interrupts for the second Buffer if
* configured in HW */
if (drvdata->rx_ping_pong != 0) {
- reg_data = in_be32(drvdata->base_addr + XEL_BUFFER_OFFSET +
- XEL_RSR_OFFSET);
out_be32(drvdata->base_addr + XEL_BUFFER_OFFSET +
XEL_RSR_OFFSET,
- reg_data | XEL_RSR_RECV_IE_MASK);
+ XEL_RSR_RECV_IE_MASK);
}
/* Enable the Global Interrupt Enable */
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 8574622e36a..c9e2ae90f19 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -154,9 +154,8 @@ int sync_start(void)
{
int err;
- if (!alloc_cpumask_var(&marked_cpus, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
return -ENOMEM;
- cpumask_clear(marked_cpus);
start_cpu_work();
diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 554e11f9e1c..8eefe56f1cb 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -31,7 +31,7 @@
#define PARPORT_MIN_SPINTIME_VALUE 1
#define PARPORT_MAX_SPINTIME_VALUE 1000
-static int do_active_device(ctl_table *table, int write, struct file *filp,
+static int do_active_device(ctl_table *table, int write,
void __user *result, size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
@@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write, struct file *filp,
}
#ifdef CONFIG_PARPORT_1284
-static int do_autoprobe(ctl_table *table, int write, struct file *filp,
+static int do_autoprobe(ctl_table *table, int write,
void __user *result, size_t *lenp, loff_t *ppos)
{
struct parport_device_info *info = table->extra2;
@@ -111,7 +111,7 @@ static int do_autoprobe(ctl_table *table, int write, struct file *filp,
#endif /* IEEE1284.3 support. */
static int do_hardware_base_addr (ctl_table *table, int write,
- struct file *filp, void __user *result,
+ void __user *result,
size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
@@ -139,7 +139,7 @@ static int do_hardware_base_addr (ctl_table *table, int write,
}
static int do_hardware_irq (ctl_table *table, int write,
- struct file *filp, void __user *result,
+ void __user *result,
size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
@@ -167,7 +167,7 @@ static int do_hardware_irq (ctl_table *table, int write,
}
static int do_hardware_dma (ctl_table *table, int write,
- struct file *filp, void __user *result,
+ void __user *result,
size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
@@ -195,7 +195,7 @@ static int do_hardware_dma (ctl_table *table, int write,
}
static int do_hardware_modes (ctl_table *table, int write,
- struct file *filp, void __user *result,
+ void __user *result,
size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 36faa9a8e18..3070f77eb56 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -72,15 +72,9 @@ do { \
#define SLOT_NAME_SIZE 10
struct slot {
- u8 bus;
- u8 device;
u8 state;
- u8 hp_slot;
- u32 number;
struct controller *ctrl;
- struct hpc_ops *hpc_ops;
struct hotplug_slot *hotplug_slot;
- struct list_head slot_list;
struct delayed_work work; /* work for button event */
struct mutex lock;
};
@@ -92,18 +86,10 @@ struct event_info {
};
struct controller {
- struct mutex crit_sect; /* critical section mutex */
struct mutex ctrl_lock; /* controller lock */
- int num_slots; /* Number of slots on ctlr */
- int slot_num_inc; /* 1 or -1 */
- struct pci_dev *pci_dev;
struct pcie_device *pcie; /* PCI Express port service */
- struct list_head slot_list;
- struct hpc_ops *hpc_ops;
+ struct slot *slot;
wait_queue_head_t queue; /* sleep & wake process */
- u8 slot_device_offset;
- u32 first_slot; /* First physical slot number */ /* PCIE only has 1 slot */
- u8 slot_bus; /* Bus where the slots handled by this controller sit */
u32 slot_cap;
u8 cap_base;
struct timer_list poll_timer;
@@ -131,40 +117,20 @@ struct controller {
#define POWERON_STATE 3
#define POWEROFF_STATE 4
-/* Error messages */
-#define INTERLOCK_OPEN 0x00000002
-#define ADD_NOT_SUPPORTED 0x00000003
-#define CARD_FUNCTIONING 0x00000005
-#define ADAPTER_NOT_SAME 0x00000006
-#define NO_ADAPTER_PRESENT 0x00000009
-#define NOT_ENOUGH_RESOURCES 0x0000000B
-#define DEVICE_TYPE_NOT_SUPPORTED 0x0000000C
-#define WRONG_BUS_FREQUENCY 0x0000000D
-#define POWER_FAILURE 0x0000000E
-
-/* Field definitions in Slot Capabilities Register */
-#define ATTN_BUTTN_PRSN 0x00000001
-#define PWR_CTRL_PRSN 0x00000002
-#define MRL_SENS_PRSN 0x00000004
-#define ATTN_LED_PRSN 0x00000008
-#define PWR_LED_PRSN 0x00000010
-#define HP_SUPR_RM_SUP 0x00000020
-#define EMI_PRSN 0x00020000
-#define NO_CMD_CMPL_SUP 0x00040000
-
-#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & ATTN_BUTTN_PRSN)
-#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PWR_CTRL_PRSN)
-#define MRL_SENS(ctrl) ((ctrl)->slot_cap & MRL_SENS_PRSN)
-#define ATTN_LED(ctrl) ((ctrl)->slot_cap & ATTN_LED_PRSN)
-#define PWR_LED(ctrl) ((ctrl)->slot_cap & PWR_LED_PRSN)
-#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & HP_SUPR_RM_SUP)
-#define EMI(ctrl) ((ctrl)->slot_cap & EMI_PRSN)
-#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & NO_CMD_CMPL_SUP)
+#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_ABP)
+#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PCP)
+#define MRL_SENS(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_MRLSP)
+#define ATTN_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_AIP)
+#define PWR_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PIP)
+#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_HPS)
+#define EMI(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_EIP)
+#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_NCCS)
+#define PSN(ctrl) ((ctrl)->slot_cap >> 19)
extern int pciehp_sysfs_enable_slot(struct slot *slot);
extern int pciehp_sysfs_disable_slot(struct slot *slot);
extern u8 pciehp_handle_attention_button(struct slot *p_slot);
- extern u8 pciehp_handle_switch_change(struct slot *p_slot);
+extern u8 pciehp_handle_switch_change(struct slot *p_slot);
extern u8 pciehp_handle_presence_change(struct slot *p_slot);
extern u8 pciehp_handle_power_fault(struct slot *p_slot);
extern int pciehp_configure_device(struct slot *p_slot);
@@ -175,45 +141,30 @@ int pcie_init_notification(struct controller *ctrl);
int pciehp_enable_slot(struct slot *p_slot);
int pciehp_disable_slot(struct slot *p_slot);
int pcie_enable_notification(struct controller *ctrl);
+int pciehp_power_on_slot(struct slot *slot);
+int pciehp_power_off_slot(struct slot *slot);
+int pciehp_get_power_status(struct slot *slot, u8 *status);
+int pciehp_get_attention_status(struct slot *slot, u8 *status);
+
+int pciehp_set_attention_status(struct slot *slot, u8 status);
+int pciehp_get_latch_status(struct slot *slot, u8 *status);
+int pciehp_get_adapter_status(struct slot *slot, u8 *status);
+int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *speed);
+int pciehp_get_max_link_width(struct slot *slot, enum pcie_link_width *val);
+int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *speed);
+int pciehp_get_cur_link_width(struct slot *slot, enum pcie_link_width *val);
+int pciehp_query_power_fault(struct slot *slot);
+void pciehp_green_led_on(struct slot *slot);
+void pciehp_green_led_off(struct slot *slot);
+void pciehp_green_led_blink(struct slot *slot);
+int pciehp_check_link_status(struct controller *ctrl);
+void pciehp_release_ctrl(struct controller *ctrl);
static inline const char *slot_name(struct slot *slot)
{
return hotplug_slot_name(slot->hotplug_slot);
}
-static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
-{
- struct slot *slot;
-
- list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
- if (slot->device == device)
- return slot;
- }
-
- ctrl_err(ctrl, "Slot (device=0x%02x) not found\n", device);
- return NULL;
-}
-
-struct hpc_ops {
- int (*power_on_slot)(struct slot *slot);
- int (*power_off_slot)(struct slot *slot);
- int (*get_power_status)(struct slot *slot, u8 *status);
- int (*get_attention_status)(struct slot *slot, u8 *status);
- int (*set_attention_status)(struct slot *slot, u8 status);
- int (*get_latch_status)(struct slot *slot, u8 *status);
- int (*get_adapter_status)(struct slot *slot, u8 *status);
- int (*get_max_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
- int (*get_cur_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
- int (*get_max_lnk_width)(struct slot *slot, enum pcie_link_width *val);
- int (*get_cur_lnk_width)(struct slot *slot, enum pcie_link_width *val);
- int (*query_power_fault)(struct slot *slot);
- void (*green_led_on)(struct slot *slot);
- void (*green_led_off)(struct slot *slot);
- void (*green_led_blink)(struct slot *slot);
- void (*release_ctlr)(struct controller *ctrl);
- int (*check_lnk_status)(struct controller *ctrl);
-};
-
#ifdef CONFIG_ACPI
#include <acpi/acpi.h>
#include <acpi/acpi_bus.h>
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index 7163e6a6cfa..37c8d3d0323 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -33,6 +33,11 @@
#define PCIEHP_DETECT_AUTO (2)
#define PCIEHP_DETECT_DEFAULT PCIEHP_DETECT_AUTO
+struct dummy_slot {
+ u32 number;
+ struct list_head list;
+};
+
static int slot_detection_mode;
static char *pciehp_detect_mode;
module_param(pciehp_detect_mode, charp, 0444);
@@ -77,7 +82,7 @@ static int __init dummy_probe(struct pcie_device *dev)
int pos;
u32 slot_cap;
acpi_handle handle;
- struct slot *slot, *tmp;
+ struct dummy_slot *slot, *tmp;
struct pci_dev *pdev = dev->port;
/* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
if (pciehp_get_hp_hw_control_from_firmware(pdev))
@@ -89,11 +94,11 @@ static int __init dummy_probe(struct pcie_device *dev)
if (!slot)
return -ENOMEM;
slot->number = slot_cap >> 19;
- list_for_each_entry(tmp, &dummy_slots, slot_list) {
+ list_for_each_entry(tmp, &dummy_slots, list) {
if (tmp->number == slot->number)
dup_slot_id++;
}
- list_add_tail(&slot->slot_list, &dummy_slots);
+ list_add_tail(&slot->list, &dummy_slots);
handle = DEVICE_ACPI_HANDLE(&pdev->dev);
if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
acpi_slot_detected = 1;
@@ -109,11 +114,11 @@ static struct pcie_port_service_driver __initdata dummy_driver = {
static int __init select_detection_mode(void)
{
- struct slot *slot, *tmp;
+ struct dummy_slot *slot, *tmp;
pcie_port_service_register(&dummy_driver);
pcie_port_service_unregister(&dummy_driver);
- list_for_each_entry_safe(slot, tmp, &dummy_slots, slot_list) {
- list_del(&slot->slot_list);
+ list_for_each_entry_safe(slot, tmp, &dummy_slots, list) {
+ list_del(&slot->list);
kfree(slot);
}
if (acpi_slot_detected && dup_slot_id)
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 2317557fdee..bc234719b1d 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -99,65 +99,55 @@ static void release_slot(struct hotplug_slot *hotplug_slot)
kfree(hotplug_slot);
}
-static int init_slots(struct controller *ctrl)
+static int init_slot(struct controller *ctrl)
{
- struct slot *slot;
- struct hotplug_slot *hotplug_slot;
- struct hotplug_slot_info *info;
+ struct slot *slot = ctrl->slot;
+ struct hotplug_slot *hotplug = NULL;
+ struct hotplug_slot_info *info = NULL;
char name[SLOT_NAME_SIZE];
int retval = -ENOMEM;
- list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
- hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL);
- if (!hotplug_slot)
- goto error;
-
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- goto error_hpslot;
-
- /* register this slot with the hotplug pci core */
- hotplug_slot->info = info;
- hotplug_slot->private = slot;
- hotplug_slot->release = &release_slot;
- hotplug_slot->ops = &pciehp_hotplug_slot_ops;
- slot->hotplug_slot = hotplug_slot;
- snprintf(name, SLOT_NAME_SIZE, "%u", slot->number);
-
- ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x "
- "hp_slot=%x sun=%x slot_device_offset=%x\n",
- pci_domain_nr(ctrl->pci_dev->subordinate),
- slot->bus, slot->device, slot->hp_slot, slot->number,
- ctrl->slot_device_offset);
- retval = pci_hp_register(hotplug_slot,
- ctrl->pci_dev->subordinate,
- slot->device,
- name);
- if (retval) {
- ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
- retval);
- goto error_info;
- }
- get_power_status(hotplug_slot, &info->power_status);
- get_attention_status(hotplug_slot, &info->attention_status);
- get_latch_status(hotplug_slot, &info->latch_status);
- get_adapter_status(hotplug_slot, &info->adapter_status);
+ hotplug = kzalloc(sizeof(*hotplug), GFP_KERNEL);
+ if (!hotplug)
+ goto out;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ goto out;
+
+ /* register this slot with the hotplug pci core */
+ hotplug->info = info;
+ hotplug->private = slot;
+ hotplug->release = &release_slot;
+ hotplug->ops = &pciehp_hotplug_slot_ops;
+ slot->hotplug_slot = hotplug;
+ snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
+
+ ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:00 sun=%x\n",
+ pci_domain_nr(ctrl->pcie->port->subordinate),
+ ctrl->pcie->port->subordinate->number, PSN(ctrl));
+ retval = pci_hp_register(hotplug,
+ ctrl->pcie->port->subordinate, 0, name);
+ if (retval) {
+ ctrl_err(ctrl,
+ "pci_hp_register failed with error %d\n", retval);
+ goto out;
+ }
+ get_power_status(hotplug, &info->power_status);
+ get_attention_status(hotplug, &info->attention_status);
+ get_latch_status(hotplug, &info->latch_status);
+ get_adapter_status(hotplug, &info->adapter_status);
+out:
+ if (retval) {
+ kfree(info);
+ kfree(hotplug);
}
-
- return 0;
-error_info:
- kfree(info);
-error_hpslot:
- kfree(hotplug_slot);
-error:
return retval;
}
-static void cleanup_slots(struct controller *ctrl)
+static void cleanup_slot(struct controller *ctrl)
{
- struct slot *slot;
- list_for_each_entry(slot, &ctrl->slot_list, slot_list)
- pci_hp_deregister(slot->hotplug_slot);
+ pci_hp_deregister(ctrl->slot->hotplug_slot);
}
/*
@@ -173,7 +163,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
hotplug_slot->info->attention_status = status;
if (ATTN_LED(slot->ctrl))
- slot->hpc_ops->set_attention_status(slot, status);
+ pciehp_set_attention_status(slot, status);
return 0;
}
@@ -208,7 +198,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_power_status(slot, value);
+ retval = pciehp_get_power_status(slot, value);
if (retval < 0)
*value = hotplug_slot->info->power_status;
@@ -223,7 +213,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_attention_status(slot, value);
+ retval = pciehp_get_attention_status(slot, value);
if (retval < 0)
*value = hotplug_slot->info->attention_status;
@@ -238,7 +228,7 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_latch_status(slot, value);
+ retval = pciehp_get_latch_status(slot, value);
if (retval < 0)
*value = hotplug_slot->info->latch_status;
@@ -253,7 +243,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_adapter_status(slot, value);
+ retval = pciehp_get_adapter_status(slot, value);
if (retval < 0)
*value = hotplug_slot->info->adapter_status;
@@ -269,7 +259,7 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_max_bus_speed(slot, value);
+ retval = pciehp_get_max_link_speed(slot, value);
if (retval < 0)
*value = PCI_SPEED_UNKNOWN;
@@ -284,7 +274,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_cur_bus_speed(slot, value);
+ retval = pciehp_get_cur_link_speed(slot, value);
if (retval < 0)
*value = PCI_SPEED_UNKNOWN;
@@ -295,7 +285,7 @@ static int pciehp_probe(struct pcie_device *dev)
{
int rc;
struct controller *ctrl;
- struct slot *t_slot;
+ struct slot *slot;
u8 value;
struct pci_dev *pdev = dev->port;
@@ -314,7 +304,7 @@ static int pciehp_probe(struct pcie_device *dev)
set_service_data(dev, ctrl);
/* Setup the slot information structures */
- rc = init_slots(ctrl);
+ rc = init_slot(ctrl);
if (rc) {
if (rc == -EBUSY)
ctrl_warn(ctrl, "Slot already registered by another "
@@ -332,15 +322,15 @@ static int pciehp_probe(struct pcie_device *dev)
}
/* Check if slot is occupied */
- t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
- t_slot->hpc_ops->get_adapter_status(t_slot, &value);
+ slot = ctrl->slot;
+ pciehp_get_adapter_status(slot, &value);
if (value) {
if (pciehp_force)
- pciehp_enable_slot(t_slot);
+ pciehp_enable_slot(slot);
} else {
/* Power off slot if not occupied */
if (POWER_CTRL(ctrl)) {
- rc = t_slot->hpc_ops->power_off_slot(t_slot);
+ rc = pciehp_power_off_slot(slot);
if (rc)
goto err_out_free_ctrl_slot;
}
@@ -349,19 +339,19 @@ static int pciehp_probe(struct pcie_device *dev)
return 0;
err_out_free_ctrl_slot:
- cleanup_slots(ctrl);
+ cleanup_slot(ctrl);
err_out_release_ctlr:
- ctrl->hpc_ops->release_ctlr(ctrl);
+ pciehp_release_ctrl(ctrl);
err_out_none:
return -ENODEV;
}
-static void pciehp_remove (struct pcie_device *dev)
+static void pciehp_remove(struct pcie_device *dev)
{
struct controller *ctrl = get_service_data(dev);
- cleanup_slots(ctrl);
- ctrl->hpc_ops->release_ctlr(ctrl);
+ cleanup_slot(ctrl);
+ pciehp_release_ctrl(ctrl);
}
#ifdef CONFIG_PM
@@ -376,20 +366,20 @@ static int pciehp_resume (struct pcie_device *dev)
dev_info(&dev->device, "%s ENTRY\n", __func__);
if (pciehp_force) {
struct controller *ctrl = get_service_data(dev);
- struct slot *t_slot;
+ struct slot *slot;
u8 status;
/* reinitialize the chipset's event detection logic */
pcie_enable_notification(ctrl);
- t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
+ slot = ctrl->slot;
/* Check if slot is occupied */
- t_slot->hpc_ops->get_adapter_status(t_slot, &status);
+ pciehp_get_adapter_status(slot, &status);
if (status)
- pciehp_enable_slot(t_slot);
+ pciehp_enable_slot(slot);
else
- pciehp_disable_slot(t_slot);
+ pciehp_disable_slot(slot);
}
return 0;
}
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index b97cb4c3e0f..84487d126e4 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -82,7 +82,7 @@ u8 pciehp_handle_switch_change(struct slot *p_slot)
/* Switch Change */
ctrl_dbg(ctrl, "Switch interrupt received\n");
- p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+ pciehp_get_latch_status(p_slot, &getstatus);
if (getstatus) {
/*
* Switch opened
@@ -114,7 +114,7 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
/* Switch is open, assume a presence change
* Save the presence state
*/
- p_slot->hpc_ops->get_adapter_status(p_slot, &presence_save);
+ pciehp_get_adapter_status(p_slot, &presence_save);
if (presence_save) {
/*
* Card Present
@@ -143,7 +143,7 @@ u8 pciehp_handle_power_fault(struct slot *p_slot)
/* power fault */
ctrl_dbg(ctrl, "Power fault interrupt received\n");
- if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) {
+ if (!pciehp_query_power_fault(p_slot)) {
/*
* power fault Cleared
*/
@@ -172,7 +172,7 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
{
/* turn off slot, turn on Amber LED, turn off Green LED if supported*/
if (POWER_CTRL(ctrl)) {
- if (pslot->hpc_ops->power_off_slot(pslot)) {
+ if (pciehp_power_off_slot(pslot)) {
ctrl_err(ctrl,
"Issue of Slot Power Off command failed\n");
return;
@@ -186,10 +186,10 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
}
if (PWR_LED(ctrl))
- pslot->hpc_ops->green_led_off(pslot);
+ pciehp_green_led_off(pslot);
if (ATTN_LED(ctrl)) {
- if (pslot->hpc_ops->set_attention_status(pslot, 1)) {
+ if (pciehp_set_attention_status(pslot, 1)) {
ctrl_err(ctrl,
"Issue of Set Attention Led command failed\n");
return;
@@ -208,24 +208,20 @@ static int board_added(struct slot *p_slot)
{
int retval = 0;
struct controller *ctrl = p_slot->ctrl;
- struct pci_bus *parent = ctrl->pci_dev->subordinate;
-
- ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d, %d\n",
- __func__, p_slot->device, ctrl->slot_device_offset,
- p_slot->hp_slot);
+ struct pci_bus *parent = ctrl->pcie->port->subordinate;
if (POWER_CTRL(ctrl)) {
/* Power on slot */
- retval = p_slot->hpc_ops->power_on_slot(p_slot);
+ retval = pciehp_power_on_slot(p_slot);
if (retval)
return retval;
}
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_blink(p_slot);
+ pciehp_green_led_blink(p_slot);
/* Check link training status */
- retval = p_slot->hpc_ops->check_lnk_status(ctrl);
+ retval = pciehp_check_link_status(ctrl);
if (retval) {
ctrl_err(ctrl, "Failed to check link status\n");
set_slot_off(ctrl, p_slot);
@@ -233,21 +229,21 @@ static int board_added(struct slot *p_slot)
}
/* Check for a power fault */
- if (p_slot->hpc_ops->query_power_fault(p_slot)) {
+ if (pciehp_query_power_fault(p_slot)) {
ctrl_dbg(ctrl, "Power fault detected\n");
- retval = POWER_FAILURE;
+ retval = -EIO;
goto err_exit;
}
retval = pciehp_configure_device(p_slot);
if (retval) {
- ctrl_err(ctrl, "Cannot add device at %04x:%02x:%02x\n",
- pci_domain_nr(parent), p_slot->bus, p_slot->device);
+ ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n",
+ pci_domain_nr(parent), parent->number);
goto err_exit;
}
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_on(p_slot);
+ pciehp_green_led_on(p_slot);
return 0;
@@ -269,11 +265,9 @@ static int remove_board(struct slot *p_slot)
if (retval)
return retval;
- ctrl_dbg(ctrl, "%s: hp_slot = %d\n", __func__, p_slot->hp_slot);
-
if (POWER_CTRL(ctrl)) {
/* power off slot */
- retval = p_slot->hpc_ops->power_off_slot(p_slot);
+ retval = pciehp_power_off_slot(p_slot);
if (retval) {
ctrl_err(ctrl,
"Issue of Slot Disable command failed\n");
@@ -287,9 +281,9 @@ static int remove_board(struct slot *p_slot)
msleep(1000);
}
+ /* turn off Green LED */
if (PWR_LED(ctrl))
- /* turn off Green LED */
- p_slot->hpc_ops->green_led_off(p_slot);
+ pciehp_green_led_off(p_slot);
return 0;
}
@@ -317,18 +311,17 @@ static void pciehp_power_thread(struct work_struct *work)
case POWEROFF_STATE:
mutex_unlock(&p_slot->lock);
ctrl_dbg(p_slot->ctrl,
- "Disabling domain:bus:device=%04x:%02x:%02x\n",
- pci_domain_nr(p_slot->ctrl->pci_dev->subordinate),
- p_slot->bus, p_slot->device);
+ "Disabling domain:bus:device=%04x:%02x:00\n",
+ pci_domain_nr(p_slot->ctrl->pcie->port->subordinate),
+ p_slot->ctrl->pcie->port->subordinate->number);
pciehp_disable_slot(p_slot);
mutex_lock(&p_slot->lock);
p_slot->state = STATIC_STATE;
break;
case POWERON_STATE:
mutex_unlock(&p_slot->lock);
- if (pciehp_enable_slot(p_slot) &&
- PWR_LED(p_slot->ctrl))
- p_slot->hpc_ops->green_led_off(p_slot);
+ if (pciehp_enable_slot(p_slot) && PWR_LED(p_slot->ctrl))
+ pciehp_green_led_off(p_slot);
mutex_lock(&p_slot->lock);
p_slot->state = STATIC_STATE;
break;
@@ -379,10 +372,10 @@ static int update_slot_info(struct slot *slot)
if (!info)
return -ENOMEM;
- slot->hpc_ops->get_power_status(slot, &(info->power_status));
- slot->hpc_ops->get_attention_status(slot, &(info->attention_status));
- slot->hpc_ops->get_latch_status(slot, &(info->latch_status));
- slot->hpc_ops->get_adapter_status(slot, &(info->adapter_status));
+ pciehp_get_power_status(slot, &info->power_status);
+ pciehp_get_attention_status(slot, &info->attention_status);
+ pciehp_get_latch_status(slot, &info->latch_status);
+ pciehp_get_adapter_status(slot, &info->adapter_status);
result = pci_hp_change_slot_info(slot->hotplug_slot, info);
kfree (info);
@@ -399,7 +392,7 @@ static void handle_button_press_event(struct slot *p_slot)
switch (p_slot->state) {
case STATIC_STATE:
- p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+ pciehp_get_power_status(p_slot, &getstatus);
if (getstatus) {
p_slot->state = BLINKINGOFF_STATE;
ctrl_info(ctrl,
@@ -413,9 +406,9 @@ static void handle_button_press_event(struct slot *p_slot)
}
/* blink green LED and turn off amber */
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_blink(p_slot);
+ pciehp_green_led_blink(p_slot);
if (ATTN_LED(ctrl))
- p_slot->hpc_ops->set_attention_status(p_slot, 0);
+ pciehp_set_attention_status(p_slot, 0);
schedule_delayed_work(&p_slot->work, 5*HZ);
break;
@@ -430,13 +423,13 @@ static void handle_button_press_event(struct slot *p_slot)
cancel_delayed_work(&p_slot->work);
if (p_slot->state == BLINKINGOFF_STATE) {
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_on(p_slot);
+ pciehp_green_led_on(p_slot);
} else {
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_off(p_slot);
+ pciehp_green_led_off(p_slot);
}
if (ATTN_LED(ctrl))
- p_slot->hpc_ops->set_attention_status(p_slot, 0);
+ pciehp_set_attention_status(p_slot, 0);
ctrl_info(ctrl, "PCI slot #%s - action canceled "
"due to button press\n", slot_name(p_slot));
p_slot->state = STATIC_STATE;
@@ -474,7 +467,7 @@ static void handle_surprise_event(struct slot *p_slot)
info->p_slot = p_slot;
INIT_WORK(&info->work, pciehp_power_thread);
- p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+ pciehp_get_adapter_status(p_slot, &getstatus);
if (!getstatus)
p_slot->state = POWEROFF_STATE;
else
@@ -498,9 +491,9 @@ static void interrupt_event_handler(struct work_struct *work)
if (!POWER_CTRL(ctrl))
break;
if (ATTN_LED(ctrl))
- p_slot->hpc_ops->set_attention_status(p_slot, 1);
+ pciehp_set_attention_status(p_slot, 1);
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_off(p_slot);
+ pciehp_green_led_off(p_slot);
break;
case INT_PRESENCE_ON:
case INT_PRESENCE_OFF:
@@ -525,45 +518,38 @@ int pciehp_enable_slot(struct slot *p_slot)
int rc;
struct controller *ctrl = p_slot->ctrl;
- /* Check to see if (latch closed, card present, power off) */
- mutex_lock(&p_slot->ctrl->crit_sect);
-
- rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+ rc = pciehp_get_adapter_status(p_slot, &getstatus);
if (rc || !getstatus) {
ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -ENODEV;
}
if (MRL_SENS(p_slot->ctrl)) {
- rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+ rc = pciehp_get_latch_status(p_slot, &getstatus);
if (rc || getstatus) {
ctrl_info(ctrl, "Latch open on slot(%s)\n",
slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -ENODEV;
}
}
if (POWER_CTRL(p_slot->ctrl)) {
- rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+ rc = pciehp_get_power_status(p_slot, &getstatus);
if (rc || getstatus) {
ctrl_info(ctrl, "Already enabled on slot(%s)\n",
slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -EINVAL;
}
}
- p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+ pciehp_get_latch_status(p_slot, &getstatus);
rc = board_added(p_slot);
if (rc) {
- p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+ pciehp_get_latch_status(p_slot, &getstatus);
}
update_slot_info(p_slot);
- mutex_unlock(&p_slot->ctrl->crit_sect);
return rc;
}
@@ -577,35 +563,29 @@ int pciehp_disable_slot(struct slot *p_slot)
if (!p_slot->ctrl)
return 1;
- /* Check to see if (latch closed, card present, power on) */
- mutex_lock(&p_slot->ctrl->crit_sect);
-
if (!HP_SUPR_RM(p_slot->ctrl)) {
- ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+ ret = pciehp_get_adapter_status(p_slot, &getstatus);
if (ret || !getstatus) {
ctrl_info(ctrl, "No adapter on slot(%s)\n",
slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -ENODEV;
}
}
if (MRL_SENS(p_slot->ctrl)) {
- ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+ ret = pciehp_get_latch_status(p_slot, &getstatus);
if (ret || getstatus) {
ctrl_info(ctrl, "Latch open on slot(%s)\n",
slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -ENODEV;
}
}
if (POWER_CTRL(p_slot->ctrl)) {
- ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+ ret = pciehp_get_power_status(p_slot, &getstatus);
if (ret || !getstatus) {
ctrl_info(ctrl, "Already disabled on slot(%s)\n",
slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -EINVAL;
}
}
@@ -613,7 +593,6 @@ int pciehp_disable_slot(struct slot *p_slot)
ret = remove_board(p_slot);
update_slot_info(p_slot);
- mutex_unlock(&p_slot->ctrl->crit_sect);
return ret;
}
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 271f917b6f2..9ef4605c1ef 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -44,25 +44,25 @@ static atomic_t pciehp_num_controllers = ATOMIC_INIT(0);
static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
return pci_read_config_word(dev, ctrl->cap_base + reg, value);
}
static inline int pciehp_readl(struct controller *ctrl, int reg, u32 *value)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
return pci_read_config_dword(dev, ctrl->cap_base + reg, value);
}
static inline int pciehp_writew(struct controller *ctrl, int reg, u16 value)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
return pci_write_config_word(dev, ctrl->cap_base + reg, value);
}
static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
return pci_write_config_dword(dev, ctrl->cap_base + reg, value);
}
@@ -266,7 +266,7 @@ static void pcie_wait_link_active(struct controller *ctrl)
ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n");
}
-static int hpc_check_lnk_status(struct controller *ctrl)
+int pciehp_check_link_status(struct controller *ctrl)
{
u16 lnk_status;
int retval = 0;
@@ -305,7 +305,7 @@ static int hpc_check_lnk_status(struct controller *ctrl)
return retval;
}
-static int hpc_get_attention_status(struct slot *slot, u8 *status)
+int pciehp_get_attention_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
u16 slot_ctrl;
@@ -344,7 +344,7 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status)
return 0;
}
-static int hpc_get_power_status(struct slot *slot, u8 *status)
+int pciehp_get_power_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
u16 slot_ctrl;
@@ -376,7 +376,7 @@ static int hpc_get_power_status(struct slot *slot, u8 *status)
return retval;
}
-static int hpc_get_latch_status(struct slot *slot, u8 *status)
+int pciehp_get_latch_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
u16 slot_status;
@@ -392,7 +392,7 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status)
return 0;
}
-static int hpc_get_adapter_status(struct slot *slot, u8 *status)
+int pciehp_get_adapter_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
u16 slot_status;
@@ -408,7 +408,7 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status)
return 0;
}
-static int hpc_query_power_fault(struct slot *slot)
+int pciehp_query_power_fault(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_status;
@@ -422,7 +422,7 @@ static int hpc_query_power_fault(struct slot *slot)
return !!(slot_status & PCI_EXP_SLTSTA_PFD);
}
-static int hpc_set_attention_status(struct slot *slot, u8 value)
+int pciehp_set_attention_status(struct slot *slot, u8 value)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
@@ -450,7 +450,7 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
return rc;
}
-static void hpc_set_green_led_on(struct slot *slot)
+void pciehp_green_led_on(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
@@ -463,7 +463,7 @@ static void hpc_set_green_led_on(struct slot *slot)
__func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
}
-static void hpc_set_green_led_off(struct slot *slot)
+void pciehp_green_led_off(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
@@ -476,7 +476,7 @@ static void hpc_set_green_led_off(struct slot *slot)
__func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
}
-static void hpc_set_green_led_blink(struct slot *slot)
+void pciehp_green_led_blink(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
@@ -489,7 +489,7 @@ static void hpc_set_green_led_blink(struct slot *slot)
__func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
}
-static int hpc_power_on_slot(struct slot * slot)
+int pciehp_power_on_slot(struct slot * slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
@@ -497,8 +497,6 @@ static int hpc_power_on_slot(struct slot * slot)
u16 slot_status;
int retval = 0;
- ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
-
/* Clear sticky power-fault bit from previous power failures */
retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
if (retval) {
@@ -539,7 +537,7 @@ static int hpc_power_on_slot(struct slot * slot)
static inline int pcie_mask_bad_dllp(struct controller *ctrl)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
int pos;
u32 reg;
@@ -556,7 +554,7 @@ static inline int pcie_mask_bad_dllp(struct controller *ctrl)
static inline void pcie_unmask_bad_dllp(struct controller *ctrl)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
u32 reg;
int pos;
@@ -570,7 +568,7 @@ static inline void pcie_unmask_bad_dllp(struct controller *ctrl)
pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg);
}
-static int hpc_power_off_slot(struct slot * slot)
+int pciehp_power_off_slot(struct slot * slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
@@ -578,8 +576,6 @@ static int hpc_power_off_slot(struct slot * slot)
int retval = 0;
int changed;
- ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
-
/*
* Set Bad DLLP Mask bit in Correctable Error Mask
* Register. This is the workaround against Bad DLLP error
@@ -614,8 +610,8 @@ static int hpc_power_off_slot(struct slot * slot)
static irqreturn_t pcie_isr(int irq, void *dev_id)
{
struct controller *ctrl = (struct controller *)dev_id;
+ struct slot *slot = ctrl->slot;
u16 detected, intr_loc;
- struct slot *p_slot;
/*
* In order to guarantee that all interrupt events are
@@ -656,29 +652,27 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
if (!(intr_loc & ~PCI_EXP_SLTSTA_CC))
return IRQ_HANDLED;
- p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
-
/* Check MRL Sensor Changed */
if (intr_loc & PCI_EXP_SLTSTA_MRLSC)
- pciehp_handle_switch_change(p_slot);
+ pciehp_handle_switch_change(slot);
/* Check Attention Button Pressed */
if (intr_loc & PCI_EXP_SLTSTA_ABP)
- pciehp_handle_attention_button(p_slot);
+ pciehp_handle_attention_button(slot);
/* Check Presence Detect Changed */
if (intr_loc & PCI_EXP_SLTSTA_PDC)
- pciehp_handle_presence_change(p_slot);
+ pciehp_handle_presence_change(slot);
/* Check Power Fault Detected */
if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
ctrl->power_fault_detected = 1;
- pciehp_handle_power_fault(p_slot);
+ pciehp_handle_power_fault(slot);
}
return IRQ_HANDLED;
}
-static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
+int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *value)
{
struct controller *ctrl = slot->ctrl;
enum pcie_link_speed lnk_speed;
@@ -709,7 +703,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
return retval;
}
-static int hpc_get_max_lnk_width(struct slot *slot,
+int pciehp_get_max_lnk_width(struct slot *slot,
enum pcie_link_width *value)
{
struct controller *ctrl = slot->ctrl;
@@ -759,7 +753,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
return retval;
}
-static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
+int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *value)
{
struct controller *ctrl = slot->ctrl;
enum pcie_link_speed lnk_speed = PCI_SPEED_UNKNOWN;
@@ -791,7 +785,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
return retval;
}
-static int hpc_get_cur_lnk_width(struct slot *slot,
+int pciehp_get_cur_lnk_width(struct slot *slot,
enum pcie_link_width *value)
{
struct controller *ctrl = slot->ctrl;
@@ -842,30 +836,6 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
return retval;
}
-static void pcie_release_ctrl(struct controller *ctrl);
-static struct hpc_ops pciehp_hpc_ops = {
- .power_on_slot = hpc_power_on_slot,
- .power_off_slot = hpc_power_off_slot,
- .set_attention_status = hpc_set_attention_status,
- .get_power_status = hpc_get_power_status,
- .get_attention_status = hpc_get_attention_status,
- .get_latch_status = hpc_get_latch_status,
- .get_adapter_status = hpc_get_adapter_status,
-
- .get_max_bus_speed = hpc_get_max_lnk_speed,
- .get_cur_bus_speed = hpc_get_cur_lnk_speed,
- .get_max_lnk_width = hpc_get_max_lnk_width,
- .get_cur_lnk_width = hpc_get_cur_lnk_width,
-
- .query_power_fault = hpc_query_power_fault,
- .green_led_on = hpc_set_green_led_on,
- .green_led_off = hpc_set_green_led_off,
- .green_led_blink = hpc_set_green_led_blink,
-
- .release_ctlr = pcie_release_ctrl,
- .check_lnk_status = hpc_check_lnk_status,
-};
-
int pcie_enable_notification(struct controller *ctrl)
{
u16 cmd, mask;
@@ -930,23 +900,16 @@ static int pcie_init_slot(struct controller *ctrl)
if (!slot)
return -ENOMEM;
- slot->hp_slot = 0;
slot->ctrl = ctrl;
- slot->bus = ctrl->pci_dev->subordinate->number;
- slot->device = ctrl->slot_device_offset + slot->hp_slot;
- slot->hpc_ops = ctrl->hpc_ops;
- slot->number = ctrl->first_slot;
mutex_init(&slot->lock);
INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work);
- list_add(&slot->slot_list, &ctrl->slot_list);
+ ctrl->slot = slot;
return 0;
}
static void pcie_cleanup_slot(struct controller *ctrl)
{
- struct slot *slot;
- slot = list_first_entry(&ctrl->slot_list, struct slot, slot_list);
- list_del(&slot->slot_list);
+ struct slot *slot = ctrl->slot;
cancel_delayed_work(&slot->work);
flush_scheduled_work();
flush_workqueue(pciehp_wq);
@@ -957,7 +920,7 @@ static inline void dbg_ctrl(struct controller *ctrl)
{
int i;
u16 reg16;
- struct pci_dev *pdev = ctrl->pci_dev;
+ struct pci_dev *pdev = ctrl->pcie->port;
if (!pciehp_debug)
return;
@@ -980,7 +943,7 @@ static inline void dbg_ctrl(struct controller *ctrl)
(unsigned long long)pci_resource_start(pdev, i));
}
ctrl_info(ctrl, "Slot Capabilities : 0x%08x\n", ctrl->slot_cap);
- ctrl_info(ctrl, " Physical Slot Number : %d\n", ctrl->first_slot);
+ ctrl_info(ctrl, " Physical Slot Number : %d\n", PSN(ctrl));
ctrl_info(ctrl, " Attention Button : %3s\n",
ATTN_BUTTN(ctrl) ? "yes" : "no");
ctrl_info(ctrl, " Power Controller : %3s\n",
@@ -1014,10 +977,7 @@ struct controller *pcie_init(struct pcie_device *dev)
dev_err(&dev->device, "%s: Out of memory\n", __func__);
goto abort;
}
- INIT_LIST_HEAD(&ctrl->slot_list);
-
ctrl->pcie = dev;
- ctrl->pci_dev = pdev;
ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
if (!ctrl->cap_base) {
ctrl_err(ctrl, "Cannot find PCI Express capability\n");
@@ -1029,11 +989,6 @@ struct controller *pcie_init(struct pcie_device *dev)
}
ctrl->slot_cap = slot_cap;
- ctrl->first_slot = slot_cap >> 19;
- ctrl->slot_device_offset = 0;
- ctrl->num_slots = 1;
- ctrl->hpc_ops = &pciehp_hpc_ops;
- mutex_init(&ctrl->crit_sect);
mutex_init(&ctrl->ctrl_lock);
init_waitqueue_head(&ctrl->queue);
dbg_ctrl(ctrl);
@@ -1089,7 +1044,7 @@ abort:
return NULL;
}
-void pcie_release_ctrl(struct controller *ctrl)
+void pciehp_release_ctrl(struct controller *ctrl)
{
pcie_shutdown_notification(ctrl);
pcie_cleanup_slot(ctrl);
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index 02e24d63b3e..21733108add 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -63,27 +63,27 @@ static int __ref pciehp_add_bridge(struct pci_dev *dev)
int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
+ struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
- dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
+ dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {
ctrl_err(ctrl, "Device %s already exists "
- "at %04x:%02x:%02x, cannot hot-add\n", pci_name(dev),
- pci_domain_nr(parent), p_slot->bus, p_slot->device);
+ "at %04x:%02x:00, cannot hot-add\n", pci_name(dev),
+ pci_domain_nr(parent), parent->number);
pci_dev_put(dev);
return -EINVAL;
}
- num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0));
+ num = pci_scan_slot(parent, PCI_DEVFN(0, 0));
if (num == 0) {
ctrl_err(ctrl, "No new device found\n");
return -ENODEV;
}
for (fn = 0; fn < 8; fn++) {
- dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, fn));
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
if (!dev)
continue;
if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
@@ -111,19 +111,18 @@ int pciehp_unconfigure_device(struct slot *p_slot)
int j;
u8 bctl = 0;
u8 presence = 0;
- struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
+ struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
u16 command;
struct controller *ctrl = p_slot->ctrl;
- ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:%02x\n",
- __func__, pci_domain_nr(parent), p_slot->bus, p_slot->device);
- ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence);
+ ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
+ __func__, pci_domain_nr(parent), parent->number);
+ ret = pciehp_get_adapter_status(p_slot, &presence);
if (ret)
presence = 0;
for (j = 0; j < 8; j++) {
- struct pci_dev* temp = pci_get_slot(parent,
- (p_slot->device << 3) | j);
+ struct pci_dev* temp = pci_get_slot(parent, PCI_DEVFN(0, j));
if (!temp)
continue;
if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 10c0e62bd5a..2ce8f9ccc66 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -318,6 +318,8 @@ static int __init aer_service_init(void)
{
if (pcie_aer_disable)
return -ENXIO;
+ if (!pci_msi_enabled())
+ return -ENXIO;
return pcie_port_service_register(&aerdriver);
}
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index f289ca9bf18..745402e8e49 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -303,9 +303,6 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev,
pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, &reg32);
info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10;
- /* 00b and 10b are defined as "Reserved". */
- if (info->support == PCIE_LINK_STATE_L1)
- info->support = 0;
info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12;
info->latency_encoding_l1 = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15;
pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index fb45f5ee8df..454970d2d70 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -746,7 +746,9 @@ static acpi_status WMID_set_u32(u32 value, u32 cap, struct wmi_interface *iface)
return AE_BAD_PARAMETER;
if (quirks->mailled == 1) {
param = value ? 0x92 : 0x93;
+ i8042_lock_chip();
i8042_command(&param, 0x1059);
+ i8042_unlock_chip();
return 0;
}
break;
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index c431198bdbc..82daa3c1dc9 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -14,7 +14,6 @@
#include <linux/init.h>
#include <linux/miscdevice.h>
-#include <linux/utsname.h>
#include <linux/debugfs.h>
#include <asm/ipl.h>
#include <asm/sclp.h>
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 82b34893e5b..9a4dd5992f6 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -117,8 +117,6 @@ source "drivers/staging/vt6655/Kconfig"
source "drivers/staging/vt6656/Kconfig"
-source "drivers/staging/cpc-usb/Kconfig"
-
source "drivers/staging/udlfb/Kconfig"
source "drivers/staging/hv/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index b1cad0d9ba7..104f2f8897e 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -40,7 +40,6 @@ obj-$(CONFIG_USB_SERIAL_QUATECH_USB2) += quatech_usb2/
obj-$(CONFIG_OCTEON_ETHERNET) += octeon/
obj-$(CONFIG_VT6655) += vt6655/
obj-$(CONFIG_VT6656) += vt6656/
-obj-$(CONFIG_USB_CPC) += cpc-usb/
obj-$(CONFIG_FB_UDL) += udlfb/
obj-$(CONFIG_HYPERV) += hv/
obj-$(CONFIG_VME_BUS) += vme/
diff --git a/drivers/staging/cpc-usb/Kconfig b/drivers/staging/cpc-usb/Kconfig
deleted file mode 100644
index 2be0bc9c39d..00000000000
--- a/drivers/staging/cpc-usb/Kconfig
+++ /dev/null
@@ -1,4 +0,0 @@
-config USB_CPC
- tristate "CPC CAN USB driver"
- depends on USB && PROC_FS
- default n
diff --git a/drivers/staging/cpc-usb/Makefile b/drivers/staging/cpc-usb/Makefile
deleted file mode 100644
index 3f83170a8fa..00000000000
--- a/drivers/staging/cpc-usb/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_USB_CPC) += cpc-usb.o
-
-cpc-usb-y := cpc-usb_drv.o sja2m16c_2.o
diff --git a/drivers/staging/cpc-usb/TODO b/drivers/staging/cpc-usb/TODO
deleted file mode 100644
index 9b1752fb9cd..00000000000
--- a/drivers/staging/cpc-usb/TODO
+++ /dev/null
@@ -1,10 +0,0 @@
-Things to do for this driver to get merged into the main portion of the
-kernel:
- - checkpatch cleanups
- - sparse clean
- - remove proc code
- - tie into CAN socket interfaces if possible
- - figure out sane userspace api
- - use linux's error codes
-
-Send patches to Greg Kroah-Hartman <greg@kroah.com>
diff --git a/drivers/staging/cpc-usb/cpc-usb_drv.c b/drivers/staging/cpc-usb/cpc-usb_drv.c
deleted file mode 100644
index c5eca46996f..00000000000
--- a/drivers/staging/cpc-usb/cpc-usb_drv.c
+++ /dev/null
@@ -1,1184 +0,0 @@
-/*
- * CPC-USB CAN Interface Kernel Driver
- *
- * Copyright (C) 2004-2009 EMS Dr. Thomas Wuensche
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published
- * by the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/poll.h>
-#include <linux/smp_lock.h>
-#include <linux/completion.h>
-#include <asm/uaccess.h>
-#include <linux/usb.h>
-
-
-#include <linux/proc_fs.h>
-
-#include "cpc.h"
-
-#include "cpc_int.h"
-#include "cpcusb.h"
-
-#include "sja2m16c.h"
-
-/* Version Information */
-#define DRIVER_AUTHOR "Sebastian Haas <haas@ems-wuensche.com>"
-#define DRIVER_DESC "CPC-USB Driver for Linux Kernel 2.6"
-#define DRIVER_VERSION CPC_DRIVER_VERSION
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL v2");
-
-/* Define these values to match your devices */
-#define USB_CPCUSB_VENDOR_ID 0x12D6
-
-#define USB_CPCUSB_M16C_PRODUCT_ID 0x0888
-#define USB_CPCUSB_LPC2119_PRODUCT_ID 0x0444
-
-#define CPC_USB_PROC_DIR CPC_PROC_DIR "cpc-usb"
-
-static struct proc_dir_entry *procDir;
-static struct proc_dir_entry *procEntry;
-
-/* Module parameters */
-static int debug;
-module_param(debug, int, S_IRUGO);
-
-/* table of devices that work with this driver */
-static struct usb_device_id cpcusb_table[] = {
- {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_M16C_PRODUCT_ID)},
- {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_LPC2119_PRODUCT_ID)},
- {} /* Terminating entry */
-};
-
-MODULE_DEVICE_TABLE(usb, cpcusb_table);
-
-/* use to prevent kernel panic if driver is unloaded
- * while a programm has still open the device
- */
-DECLARE_WAIT_QUEUE_HEAD(rmmodWq);
-atomic_t useCount;
-
-static CPC_USB_T *CPCUSB_Table[CPC_USB_CARD_CNT] = { 0 };
-static unsigned int CPCUsbCnt;
-
-/* prevent races between open() and disconnect() */
-static DECLARE_MUTEX(disconnect_sem);
-
-/* local function prototypes */
-static ssize_t cpcusb_read(struct file *file, char *buffer, size_t count,
- loff_t *ppos);
-static ssize_t cpcusb_write(struct file *file, const char *buffer,
- size_t count, loff_t *ppos);
-static unsigned int cpcusb_poll(struct file *file, poll_table * wait);
-static int cpcusb_open(struct inode *inode, struct file *file);
-static int cpcusb_release(struct inode *inode, struct file *file);
-
-static int cpcusb_probe(struct usb_interface *interface,
- const struct usb_device_id *id);
-static void cpcusb_disconnect(struct usb_interface *interface);
-
-static void cpcusb_read_bulk_callback(struct urb *urb);
-static void cpcusb_write_bulk_callback(struct urb *urb);
-static void cpcusb_read_interrupt_callback(struct urb *urb);
-
-static int cpcusb_setup_intrep(CPC_USB_T *card);
-
-static struct file_operations cpcusb_fops = {
- /*
- * The owner field is part of the module-locking
- * mechanism. The idea is that the kernel knows
- * which module to increment the use-counter of
- * BEFORE it calls the device's open() function.
- * This also means that the kernel can decrement
- * the use-counter again before calling release()
- * or should the open() function fail.
- */
- .owner = THIS_MODULE,
-
- .read = cpcusb_read,
- .write = cpcusb_write,
- .poll = cpcusb_poll,
- .open = cpcusb_open,
- .release = cpcusb_release,
-};
-
-/*
- * usb class driver info in order to get a minor number from the usb core,
- * and to have the device registered with devfs and the driver core
- */
-static struct usb_class_driver cpcusb_class = {
- .name = "usb/cpc_usb%d",
- .fops = &cpcusb_fops,
- .minor_base = CPC_USB_BASE_MNR,
-};
-
-/* usb specific object needed to register this driver with the usb subsystem */
-static struct usb_driver cpcusb_driver = {
- .name = "cpc-usb",
- .probe = cpcusb_probe,
- .disconnect = cpcusb_disconnect,
- .id_table = cpcusb_table,
-};
-
-static int cpcusb_create_info_output(char *buf)
-{
- int i = 0, j;
-
- for (j = 0; j < CPC_USB_CARD_CNT; j++) {
- if (CPCUSB_Table[j]) {
- CPC_USB_T *card = CPCUSB_Table[j];
- CPC_CHAN_T *chan = card->chan;
-
- /* MINOR CHANNELNO BUSNO SLOTNO */
- i += sprintf(&buf[i], "%d %s\n", chan->minor,
- card->serialNumber);
- }
- }
-
- return i;
-}
-
-static int cpcusb_proc_read_info(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len = cpcusb_create_info_output(page);
-
- if (len <= off + count)
- *eof = 1;
- *start = page + off;
- len -= off;
- if (len > count)
- len = count;
- if (len < 0)
- len = 0;
-
- return len;
-}
-
-/*
- * Remove CPC-USB and cleanup
- */
-static inline void cpcusb_delete(CPC_USB_T *card)
-{
- if (card) {
- if (card->chan) {
- if (card->chan->buf)
- vfree(card->chan->buf);
-
- if (card->chan->CPCWait_q)
- kfree(card->chan->CPCWait_q);
-
- kfree(card->chan);
- }
-
- CPCUSB_Table[card->idx] = NULL;
- kfree(card);
- }
-}
-
-/*
- * setup the interrupt IN endpoint of a specific CPC-USB device
- */
-static int cpcusb_setup_intrep(CPC_USB_T *card)
-{
- int retval = 0;
- struct usb_endpoint_descriptor *ep;
-
- ep = &card->interface->altsetting[0].endpoint[card->num_intr_in].desc;
-
- card->intr_in_buffer[0] = 0;
- card->free_slots = 15; /* initial size */
-
- /* setup the urb */
- usb_fill_int_urb(card->intr_in_urb, card->udev,
- usb_rcvintpipe(card->udev, card->num_intr_in),
- card->intr_in_buffer,
- sizeof(card->intr_in_buffer),
- cpcusb_read_interrupt_callback,
- card,
- ep->bInterval);
-
- card->intr_in_urb->status = 0; /* needed! */
-
- /* submit the urb */
- retval = usb_submit_urb(card->intr_in_urb, GFP_KERNEL);
-
- if (retval)
- err("%s - failed submitting intr urb, error %d", __func__,
- retval);
-
- return retval;
-}
-
-static int cpcusb_open(struct inode *inode, struct file *file)
-{
- CPC_USB_T *card = NULL;
- struct usb_interface *interface;
- int subminor;
- int j, retval = 0;
-
- subminor = iminor(inode);
-
- /* prevent disconnects */
- down(&disconnect_sem);
-
- interface = usb_find_interface(&cpcusb_driver, subminor);
- if (!interface) {
- err("%s - error, can't find device for minor %d",
- __func__, subminor);
- retval = CPC_ERR_NO_INTERFACE_PRESENT;
- goto exit_no_device;
- }
-
- card = usb_get_intfdata(interface);
- if (!card) {
- retval = CPC_ERR_NO_INTERFACE_PRESENT;
- goto exit_no_device;
- }
-
- /* lock this device */
- down(&card->sem);
-
- /* increment our usage count for the driver */
- if (card->open) {
- dbg("device already opened");
- retval = CPC_ERR_CHANNEL_ALREADY_OPEN;
- goto exit_on_error;
- }
-
- /* save our object in the file's private structure */
- file->private_data = card;
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- usb_fill_bulk_urb(card->urbs[j].urb, card->udev,
- usb_rcvbulkpipe(card->udev, card->num_bulk_in),
- card->urbs[j].buffer, card->urbs[j].size,
- cpcusb_read_bulk_callback, card);
-
- retval = usb_submit_urb(card->urbs[j].urb, GFP_KERNEL);
-
- if (retval) {
- err("%s - failed submitting read urb, error %d",
- __func__, retval);
- retval = CPC_ERR_TRANSMISSION_FAILED;
- goto exit_on_error;
- }
- }
-
- info("%s - %d URB's submitted", __func__, j);
-
- ResetBuffer(card->chan);
-
- cpcusb_setup_intrep(card);
- card->open = 1;
-
- atomic_inc(&useCount);
-
-exit_on_error:
- /* unlock this device */
- up(&card->sem);
-
-exit_no_device:
- up(&disconnect_sem);
-
- return retval;
-}
-
-static unsigned int cpcusb_poll(struct file *file, poll_table * wait)
-{
- CPC_USB_T *card = (CPC_USB_T *) file->private_data;
- unsigned int retval = 0;
-
- if (!card) {
- err("%s - device object lost", __func__);
- return -EIO;
- }
-
- poll_wait(file, card->chan->CPCWait_q, wait);
-
- if (IsBufferNotEmpty(card->chan) || !(card->present))
- retval |= (POLLIN | POLLRDNORM);
-
- if (card->free_slots)
- retval |= (POLLOUT | POLLWRNORM);
-
- return retval;
-}
-
-static int cpcusb_release(struct inode *inode, struct file *file)
-{
- CPC_USB_T *card = (CPC_USB_T *) file->private_data;
- int j, retval = 0;
-
- if (card == NULL) {
- dbg("%s - object is NULL", __func__);
- return CPC_ERR_NO_INTERFACE_PRESENT;
- }
-
- /* lock our device */
- down(&card->sem);
-
- if (!card->open) {
- dbg("%s - device not opened", __func__);
- retval = CPC_ERR_NO_INTERFACE_PRESENT;
- goto exit_not_opened;
- }
-
- /* if device wasn't unplugged kill all urbs */
- if (card->present) {
- /* kill read urbs */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- usb_kill_urb(card->urbs[j].urb);
- }
-
- /* kill irq urb */
- usb_kill_urb(card->intr_in_urb);
-
- /* kill write urbs */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- if (atomic_read(&card->wrUrbs[j].busy)) {
- usb_kill_urb(card->wrUrbs[j].urb);
- wait_for_completion(&card->wrUrbs[j].finished);
- }
- }
- }
-
- atomic_dec(&useCount);
-
- /* last process detached */
- if (atomic_read(&useCount) == 0) {
- wake_up(&rmmodWq);
- }
-
- if (!card->present && card->open) {
- /* the device was unplugged before the file was released */
- up(&card->sem);
- cpcusb_delete(card);
- return 0;
- }
-
- card->open = 0;
-
-exit_not_opened:
- up(&card->sem);
-
- return 0;
-}
-
-static ssize_t cpcusb_read(struct file *file, char *buffer, size_t count,
- loff_t *ppos)
-{
- CPC_USB_T *card = (CPC_USB_T *) file->private_data;
- CPC_CHAN_T *chan;
- int retval = 0;
-
- if (count < sizeof(CPC_MSG_T))
- return CPC_ERR_UNKNOWN;
-
- /* check if can read from the given address */
- if (!access_ok(VERIFY_WRITE, buffer, count))
- return CPC_ERR_UNKNOWN;
-
- /* lock this object */
- down(&card->sem);
-
- /* verify that the device wasn't unplugged */
- if (!card->present) {
- up(&card->sem);
- return CPC_ERR_NO_INTERFACE_PRESENT;
- }
-
- if (IsBufferEmpty(card->chan)) {
- retval = 0;
- } else {
- chan = card->chan;
-
-#if 0
- /* convert LPC2119 params back to SJA1000 params */
- if (card->deviceRevision >= 0x0200
- && chan->buf[chan->oidx].type == CPC_MSG_T_CAN_PRMS) {
- LPC2119_TO_SJA1000_Params(&chan->buf[chan->oidx]);
- }
-#endif
-
- if (copy_to_user(buffer, &chan->buf[chan->oidx], count) != 0) {
- retval = CPC_ERR_IO_TRANSFER;
- } else {
- chan->oidx = (chan->oidx + 1) % CPC_MSG_BUF_CNT;
- chan->WnR = 1;
- retval = sizeof(CPC_MSG_T);
- }
- }
-/* spin_unlock_irqrestore(&card->slock, flags); */
-
- /* unlock the device */
- up(&card->sem);
-
- return retval;
-}
-
-#define SHIFT 1
-static inline void cpcusb_align_buffer_alignment(unsigned char *buf)
-{
- /* CPC-USB uploads packed bytes. */
- CPC_MSG_T *cpc = (CPC_MSG_T *) buf;
- unsigned int i;
-
- for (i = 0; i < cpc->length + (2 * sizeof(unsigned long)); i++) {
- ((unsigned char *) &cpc->msgid)[1 + i] =
- ((unsigned char *) &cpc->msgid)[1 + SHIFT + i];
- }
-}
-
-static int cpc_get_buffer_count(CPC_CHAN_T *chan)
-{
- /* check the buffer parameters */
- if (chan->iidx == chan->oidx)
- return !chan->WnR ? CPC_MSG_BUF_CNT : 0;
- else if (chan->iidx >= chan->oidx)
- return (chan->iidx - chan->oidx) % CPC_MSG_BUF_CNT;
-
- return (chan->iidx + CPC_MSG_BUF_CNT - chan->oidx) % CPC_MSG_BUF_CNT;
-}
-
-static ssize_t cpcusb_write(struct file *file, const char *buffer,
- size_t count, loff_t *ppos)
-{
- CPC_USB_T *card = (CPC_USB_T *) file->private_data;
- CPC_USB_WRITE_URB_T *wrUrb = NULL;
-
- ssize_t bytes_written = 0;
- int retval = 0;
- int j;
-
- unsigned char *obuf = NULL;
- unsigned char type = 0;
- CPC_MSG_T *info = NULL;
-
- dbg("%s - entered minor %d, count = %zu, present = %d",
- __func__, card->minor, count, card->present);
-
- if (count > sizeof(CPC_MSG_T))
- return CPC_ERR_UNKNOWN;
-
- /* check if can read from the given address */
- if (!access_ok(VERIFY_READ, buffer, count))
- return CPC_ERR_UNKNOWN;
-
- /* lock this object */
- down(&card->sem);
-
- /* verify that the device wasn't unplugged */
- if (!card->present) {
- retval = CPC_ERR_NO_INTERFACE_PRESENT;
- goto exit;
- }
-
- /* verify that we actually have some data to write */
- if (count == 0) {
- dbg("%s - write request of 0 bytes", __func__);
- goto exit;
- }
-
- if (card->free_slots <= 5) {
- info = (CPC_MSG_T *) buffer;
-
- if (info->type != CPC_CMD_T_CLEAR_CMD_QUEUE
- || card->free_slots <= 0) {
- dbg("%s - send buffer full please try again %d",
- __func__, card->free_slots);
- retval = CPC_ERR_CAN_NO_TRANSMIT_BUF;
- goto exit;
- }
- }
-
- /* Find a free write urb */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- if (!atomic_read(&card->wrUrbs[j].busy)) {
- wrUrb = &card->wrUrbs[j]; /* remember found URB */
- atomic_set(&wrUrb->busy, 1); /* lock this URB */
- init_completion(&wrUrb->finished); /* init completion */
- dbg("WR URB no. %d started", j);
- break;
- }
- }
-
- /* don't found write urb say error */
- if (!wrUrb) {
- dbg("%s - no free send urb available", __func__);
- retval = CPC_ERR_CAN_NO_TRANSMIT_BUF;
- goto exit;
- }
- dbg("URB write req");
-
- obuf = (unsigned char *) wrUrb->urb->transfer_buffer;
-
- /* copy the data from userspace into our transfer buffer;
- * this is the only copy required.
- */
- if (copy_from_user(&obuf[4], buffer, count) != 0) {
- atomic_set(&wrUrb->busy, 0); /* release urb */
- retval = CPC_ERR_IO_TRANSFER;
- goto exit;
- }
-
- /* check if it is a DRIVER information message, so we can
- * response to that message and not the USB
- */
- info = (CPC_MSG_T *) &obuf[4];
-
- bytes_written = 11 + info->length;
- if (bytes_written >= wrUrb->size) {
- retval = CPC_ERR_IO_TRANSFER;
- goto exit;
- }
-
- switch (info->type) {
- case CPC_CMD_T_CLEAR_MSG_QUEUE:
- ResetBuffer(card->chan);
- break;
-
- case CPC_CMD_T_INQ_MSG_QUEUE_CNT:
- retval = cpc_get_buffer_count(card->chan);
- atomic_set(&wrUrb->busy, 0);
-
- goto exit;
-
- case CPC_CMD_T_INQ_INFO:
- if (info->msg.info.source == CPC_INFOMSG_T_DRIVER) {
- /* release urb cause we'll use it for driver
- * information
- */
- atomic_set(&wrUrb->busy, 0);
- if (IsBufferFull(card->chan)) {
- retval = CPC_ERR_IO_TRANSFER;
- goto exit;
- }
-
- /* it is a driver information request message and we have
- * free rx slots to store the response
- */
- type = info->msg.info.type;
- info = &card->chan->buf[card->chan->iidx];
-
- info->type = CPC_MSG_T_INFO;
- info->msg.info.source = CPC_INFOMSG_T_DRIVER;
- info->msg.info.type = type;
-
- switch (type) {
- case CPC_INFOMSG_T_VERSION:
- info->length = strlen(CPC_DRIVER_VERSION) + 2;
- sprintf(info->msg.info.msg, "%s\n",
- CPC_DRIVER_VERSION);
- break;
-
- case CPC_INFOMSG_T_SERIAL:
- info->length = strlen(CPC_DRIVER_SERIAL) + 2;
- sprintf(info->msg.info.msg, "%s\n",
- CPC_DRIVER_SERIAL);
- break;
-
- default:
- info->length = 2;
- info->msg.info.type =
- CPC_INFOMSG_T_UNKNOWN_TYPE;
- }
-
- card->chan->WnR = 0;
- card->chan->iidx =
- (card->chan->iidx + 1) % CPC_MSG_BUF_CNT;
-
- retval = info->length;
- goto exit;
- }
- break;
- case CPC_CMD_T_CAN_PRMS:
- /* Check the controller type. If it's the new CPC-USB, make sure if these are SJA1000 params */
- if (info->msg.canparams.cc_type != SJA1000
- && info->msg.canparams.cc_type != M16C_BASIC
- && (card->productId == USB_CPCUSB_LPC2119_PRODUCT_ID
- && info->msg.canparams.cc_type != SJA1000)) {
- /* don't forget to release the urb */
- atomic_set(&wrUrb->busy, 0);
- retval = CPC_ERR_WRONG_CONTROLLER_TYPE;
- goto exit;
- }
- break;
- }
-
- /* just convert the params if it is an old CPC-USB with M16C controller */
- if (card->productId == USB_CPCUSB_M16C_PRODUCT_ID) {
- /* if it is a parameter message convert it from SJA1000 controller
- * settings to M16C Basic controller settings
- */
- SJA1000_TO_M16C_BASIC_Params((CPC_MSG_T *) &obuf[4]);
- }
-
- /* don't forget the byte alignment */
- cpcusb_align_buffer_alignment(&obuf[4]);
-
- /* setup a the 4 byte header */
- obuf[0] = obuf[1] = obuf[2] = obuf[3] = 0;
-
- /* this urb was already set up, except for this write size */
- wrUrb->urb->transfer_buffer_length = bytes_written + 4;
-
- /* send the data out the bulk port */
- /* a character device write uses GFP_KERNEL,
- unless a spinlock is held */
- retval = usb_submit_urb(wrUrb->urb, GFP_KERNEL);
- if (retval) {
- atomic_set(&wrUrb->busy, 0); /* release urb */
- err("%s - failed submitting write urb, error %d",
- __func__, retval);
- } else {
- retval = bytes_written;
- }
-
-exit:
- /* unlock the device */
- up(&card->sem);
-
- dbg("%s - leaved", __func__);
-
- return retval;
-}
-
-/*
- * callback for interrupt IN urb
- */
-static void cpcusb_read_interrupt_callback(struct urb *urb)
-{
- CPC_USB_T *card = (CPC_USB_T *) urb->context;
- int retval;
- unsigned long flags;
-
- spin_lock_irqsave(&card->slock, flags);
-
- if (!card->present) {
- spin_unlock_irqrestore(&card->slock, flags);
- info("%s - no such device", __func__);
- return;
- }
-
- switch (urb->status) {
- case 0: /* success */
- card->free_slots = card->intr_in_buffer[1];
- break;
- case -ECONNRESET:
- case -ENOENT:
- case -ESHUTDOWN:
- /* urb was killed */
- spin_unlock_irqrestore(&card->slock, flags);
- dbg("%s - intr urb killed", __func__);
- return;
- default:
- info("%s - nonzero urb status %d", __func__, urb->status);
- break;
- }
-
- retval = usb_submit_urb(urb, GFP_ATOMIC);
- if (retval) {
- err("%s - failed resubmitting intr urb, error %d",
- __func__, retval);
- }
-
- spin_unlock_irqrestore(&card->slock, flags);
- wake_up_interruptible(card->chan->CPCWait_q);
-
- return;
-}
-
-#define UN_SHIFT 1
-#define CPCMSG_HEADER_LEN_FIRMWARE 11
-static inline int cpcusb_unalign_and_copy_buffy(unsigned char *out,
- unsigned char *in)
-{
- unsigned int i, j;
-
- for (i = 0; i < 3; i++)
- out[i] = in[i];
-
- for (j = 0; j < (in[1] + (CPCMSG_HEADER_LEN_FIRMWARE - 3)); j++)
- out[j + i + UN_SHIFT] = in[j + i];
-
- return i + j;
-}
-
-/*
- * callback for bulk IN urb
- */
-static void cpcusb_read_bulk_callback(struct urb *urb)
-{
- CPC_USB_T *card = (CPC_USB_T *) urb->context;
- CPC_CHAN_T *chan;
- unsigned char *ibuf = urb->transfer_buffer;
- int retval, msgCnt, start, again = 0;
- unsigned long flags;
-
- if (!card) {
- err("%s - device object lost", __func__);
- return;
- }
-
- spin_lock_irqsave(&card->slock, flags);
-
- if (!card->present) {
- spin_unlock_irqrestore(&card->slock, flags);
- info("%s - no such device", __func__);
- return;
- }
-
- switch (urb->status) {
- case 0: /* success */
- break;
- case -ECONNRESET:
- case -ENOENT:
- case -ESHUTDOWN:
- /* urb was killed */
- spin_unlock_irqrestore(&card->slock, flags);
- dbg("%s - read urb killed", __func__);
- return;
- default:
- info("%s - nonzero urb status %d", __func__, urb->status);
- break;
- }
-
- if (urb->actual_length) {
- msgCnt = ibuf[0] & ~0x80;
- again = ibuf[0] & 0x80;
-
- /* we have a 4 byte header */
- start = 4;
- chan = card->chan;
- while (msgCnt) {
- if (!(IsBufferFull(card->chan))) {
- start +=
- cpcusb_unalign_and_copy_buffy((unsigned char *)
- &chan->buf[chan->iidx], &ibuf[start]);
-
- if (start > urb->transfer_buffer_length) {
- err("%d > %d", start, urb->transfer_buffer_length);
- break;
- }
-
- chan->WnR = 0;
- chan->iidx = (chan->iidx + 1) % CPC_MSG_BUF_CNT;
- msgCnt--;
- } else {
- break;
- }
- }
- }
-
- usb_fill_bulk_urb(urb, card->udev,
- usb_rcvbulkpipe(card->udev, card->num_bulk_in),
- urb->transfer_buffer,
- urb->transfer_buffer_length,
- cpcusb_read_bulk_callback, card);
-
- retval = usb_submit_urb(urb, GFP_ATOMIC);
-
- if (retval) {
- err("%s - failed resubmitting read urb, error %d", __func__, retval);
- }
-
- spin_unlock_irqrestore(&card->slock, flags);
-
- wake_up_interruptible(card->chan->CPCWait_q);
-}
-
-/*
- * callback for bulk IN urb
- */
-static void cpcusb_write_bulk_callback(struct urb *urb)
-{
- CPC_USB_T *card = (CPC_USB_T *) urb->context;
- unsigned long flags;
- int j;
-
- spin_lock_irqsave(&card->slock, flags);
-
- /* find this urb */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- if (card->wrUrbs[j].urb == urb) {
- dbg("URB found no. %d", j);
- /* notify anyone waiting that the write has finished */
- complete(&card->wrUrbs[j].finished);
- atomic_set(&card->wrUrbs[j].busy, 0);
- break;
- }
- }
-
- switch (urb->status) {
- case 0: /* success */
- break;
- case -ECONNRESET:
- case -ENOENT:
- case -ESHUTDOWN:
- /* urb was killed */
- spin_unlock_irqrestore(&card->slock, flags);
- dbg("%s - write urb no. %d killed", __func__, j);
- return;
- default:
- info("%s - nonzero urb status %d", __func__, urb->status);
- break;
- }
-
- spin_unlock_irqrestore(&card->slock, flags);
-
- wake_up_interruptible(card->chan->CPCWait_q);
-}
-
-static inline int cpcusb_get_free_slot(void)
-{
- int i;
-
- for (i = 0; i < CPC_USB_CARD_CNT; i++) {
- if (!CPCUSB_Table[i])
- return i;
- }
-
- return -1;
-}
-
-/*
- * probe function for new CPC-USB devices
- */
-static int cpcusb_probe(struct usb_interface *interface,
- const struct usb_device_id *id)
-{
- CPC_USB_T *card = NULL;
- CPC_CHAN_T *chan = NULL;
-
- struct usb_device *udev = interface_to_usbdev(interface);
- struct usb_host_interface *iface_desc;
- struct usb_endpoint_descriptor *endpoint;
-
- int i, j, retval = -ENOMEM, slot;
-
- slot = cpcusb_get_free_slot();
- if (slot < 0) {
- info("No more devices supported");
- return -ENOMEM;
- }
-
- /* allocate memory for our device state and initialize it */
- card = kzalloc(sizeof(CPC_USB_T), GFP_KERNEL);
- if (!card) {
- err("Out of memory");
- return -ENOMEM;
- }
- CPCUSB_Table[slot] = card;
-
- /* allocate and initialize the channel struct */
- card->chan = kmalloc(sizeof(CPC_CHAN_T), GFP_KERNEL);
- if (!card->chan) {
- kfree(card);
- err("Out of memory");
- return -ENOMEM;
- }
-
- chan = card->chan;
- memset(chan, 0, sizeof(CPC_CHAN_T));
- ResetBuffer(chan);
-
- init_MUTEX(&card->sem);
- spin_lock_init(&card->slock);
-
- card->udev = udev;
- card->interface = interface;
- if (udev->descriptor.iSerialNumber) {
- usb_string(udev, udev->descriptor.iSerialNumber, card->serialNumber,
- 128);
- info("Serial %s", card->serialNumber);
- }
-
- card->productId = udev->descriptor.idProduct;
- info("Product %s",
- card->productId == USB_CPCUSB_LPC2119_PRODUCT_ID ?
- "CPC-USB/ARM7" : "CPC-USB/M16C");
-
- /* set up the endpoint information */
- /* check out the endpoints */
- /* use only the first bulk-in and bulk-out endpoints */
- iface_desc = &interface->altsetting[0];
- for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
- endpoint = &iface_desc->endpoint[i].desc;
-
- if (!card->num_intr_in &&
- (endpoint->bEndpointAddress & USB_DIR_IN) &&
- ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
- == USB_ENDPOINT_XFER_INT)) {
- card->intr_in_urb = usb_alloc_urb(0, GFP_KERNEL);
- card->num_intr_in = 1;
-
- if (!card->intr_in_urb) {
- err("No free urbs available");
- goto error;
- }
-
- dbg("intr_in urb %d", card->num_intr_in);
- }
-
- if (!card->num_bulk_in &&
- (endpoint->bEndpointAddress & USB_DIR_IN) &&
- ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
- == USB_ENDPOINT_XFER_BULK)) {
- card->num_bulk_in = 2;
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- card->urbs[j].size = endpoint->wMaxPacketSize;
- card->urbs[j].urb = usb_alloc_urb(0, GFP_KERNEL);
- if (!card->urbs[j].urb) {
- err("No free urbs available");
- goto error;
- }
- card->urbs[j].buffer =
- usb_buffer_alloc(udev,
- card->urbs[j].size,
- GFP_KERNEL,
- &card->urbs[j].urb->transfer_dma);
- if (!card->urbs[j].buffer) {
- err("Couldn't allocate bulk_in_buffer");
- goto error;
- }
- }
- info("%s - %d reading URB's allocated",
- __func__, CPC_USB_URB_CNT);
- }
-
- if (!card->num_bulk_out &&
- !(endpoint->bEndpointAddress & USB_DIR_IN) &&
- ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
- == USB_ENDPOINT_XFER_BULK)) {
-
- card->num_bulk_out = 2;
-
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- card->wrUrbs[j].size =
- endpoint->wMaxPacketSize;
- card->wrUrbs[j].urb =
- usb_alloc_urb(0, GFP_KERNEL);
- if (!card->wrUrbs[j].urb) {
- err("No free urbs available");
- goto error;
- }
- card->wrUrbs[j].buffer = usb_buffer_alloc(udev,
- card->wrUrbs[j].size, GFP_KERNEL,
- &card->wrUrbs[j].urb->transfer_dma);
-
- if (!card->wrUrbs[j].buffer) {
- err("Couldn't allocate bulk_out_buffer");
- goto error;
- }
-
- usb_fill_bulk_urb(card->wrUrbs[j].urb, udev,
- usb_sndbulkpipe(udev, endpoint->bEndpointAddress),
- card->wrUrbs[j].buffer,
- card->wrUrbs[j].size,
- cpcusb_write_bulk_callback,
- card);
- }
-
- info("%s - %d writing URB's allocated", __func__, CPC_USB_URB_CNT);
- }
- }
-
- if (!(card->num_bulk_in && card->num_bulk_out)) {
- err("Couldn't find both bulk-in and bulk-out endpoints");
- goto error;
- }
-
- /* allow device read, write and ioctl */
- card->present = 1;
-
- /* we can register the device now, as it is ready */
- usb_set_intfdata(interface, card);
- retval = usb_register_dev(interface, &cpcusb_class);
-
- if (retval) {
- /* something prevented us from registering this driver */
- err("Not able to get a minor for this device.");
- usb_set_intfdata(interface, NULL);
- goto error;
- }
-
- card->chan->minor = card->minor = interface->minor;
-
- chan->buf = vmalloc(sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT);
- if (chan->buf == NULL) {
- err("Out of memory");
- retval = -ENOMEM;
- goto error;
- }
- info("Allocated memory for %d messages (%lu kbytes)",
- CPC_MSG_BUF_CNT, (long unsigned int)(sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT) / 1000);
- memset(chan->buf, 0, sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT);
-
- ResetBuffer(chan);
-
- card->chan->CPCWait_q = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
- if (!card->chan->CPCWait_q) {
- err("Out of memory");
- retval = -ENOMEM;
- goto error;
- }
- init_waitqueue_head(card->chan->CPCWait_q);
-
- CPCUSB_Table[slot] = card;
- card->idx = slot;
- CPCUsbCnt++;
-
- /* let the user know what node this device is now attached to */
- info("Device now attached to USB-%d", card->minor);
- return 0;
-
-error:
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- if (card->urbs[j].buffer) {
- usb_buffer_free(card->udev, card->urbs[j].size,
- card->urbs[j].buffer,
- card->urbs[j].urb->transfer_dma);
- card->urbs[j].buffer = NULL;
- }
- if (card->urbs[j].urb) {
- usb_free_urb(card->urbs[j].urb);
- card->urbs[j].urb = NULL;
- }
- }
-
- cpcusb_delete(card);
- return retval;
-}
-
-/*
- * called by the usb core when the device is removed from the system
- */
-static void cpcusb_disconnect(struct usb_interface *interface)
-{
- CPC_USB_T *card = NULL;
- int minor, j;
-
- /* prevent races with open() */
- down(&disconnect_sem);
-
- card = usb_get_intfdata(interface);
- usb_set_intfdata(interface, NULL);
-
- down(&card->sem);
-
- /* prevent device read, write and ioctl */
- card->present = 0;
-
- minor = card->minor;
-
- /* free all urbs and their buffers */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- /* terminate an ongoing write */
- if (atomic_read(&card->wrUrbs[j].busy)) {
- usb_kill_urb(card->wrUrbs[j].urb);
- wait_for_completion(&card->wrUrbs[j].finished);
- }
- usb_buffer_free(card->udev, card->wrUrbs[j].size,
- card->wrUrbs[j].buffer,
- card->wrUrbs[j].urb->transfer_dma);
- usb_free_urb(card->wrUrbs[j].urb);
- }
- info("%d write URBs freed", CPC_USB_URB_CNT);
-
- /* free all urbs and their buffers */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- usb_buffer_free(card->udev, card->urbs[j].size,
- card->urbs[j].buffer,
- card->urbs[j].urb->transfer_dma);
- usb_free_urb(card->urbs[j].urb);
- }
- info("%d read URBs freed", CPC_USB_URB_CNT);
- usb_free_urb(card->intr_in_urb);
-
- /* give back our minor */
- usb_deregister_dev(interface, &cpcusb_class);
-
- up(&card->sem);
-
- /* if the device is opened, cpcusb_release will clean this up */
- if (!card->open)
- cpcusb_delete(card);
- else
- wake_up_interruptible(card->chan->CPCWait_q);
-
- up(&disconnect_sem);
-
- CPCUsbCnt--;
- info("USB-%d now disconnected", minor);
-}
-
-static int __init CPCUsb_Init(void)
-{
- int result, i;
-
- info(DRIVER_DESC " v" DRIVER_VERSION);
- info("Build on " __DATE__ " at " __TIME__);
-
- for (i = 0; i < CPC_USB_CARD_CNT; i++)
- CPCUSB_Table[i] = 0;
-
- /* register this driver with the USB subsystem */
- result = usb_register(&cpcusb_driver);
- if (result) {
- err("usb_register failed. Error number %d", result);
- return result;
- }
-
- procDir = proc_mkdir(CPC_USB_PROC_DIR, NULL);
- if (!procDir) {
- err("Could not create proc entry");
- } else {
- procEntry = create_proc_read_entry("info", 0444, procDir,
- cpcusb_proc_read_info,
- NULL);
- if (!procEntry) {
- err("Could not create proc entry %s", CPC_USB_PROC_DIR "/info");
- remove_proc_entry(CPC_USB_PROC_DIR, NULL);
- procDir = NULL;
- }
- }
-
- return 0;
-}
-
-static void __exit CPCUsb_Exit(void)
-{
- wait_event(rmmodWq, !atomic_read(&useCount));
-
- /* deregister this driver with the USB subsystem */
- usb_deregister(&cpcusb_driver);
-
- if (procDir) {
- if (procEntry)
- remove_proc_entry("info", procDir);
- remove_proc_entry(CPC_USB_PROC_DIR, NULL);
- }
-}
-
-module_init(CPCUsb_Init);
-module_exit(CPCUsb_Exit);
diff --git a/drivers/staging/cpc-usb/cpc.h b/drivers/staging/cpc-usb/cpc.h
deleted file mode 100644
index b2fda5d14c1..00000000000
--- a/drivers/staging/cpc-usb/cpc.h
+++ /dev/null
@@ -1,417 +0,0 @@
-/*
- * CPC CAN Interface Definitions
- *
- * Copyright (C) 2000-2008 EMS Dr. Thomas Wuensche
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-#ifndef CPC_HEADER
-#define CPC_HEADER
-
-/*
- * the maximum length of the union members within a CPC_MSG
- * this value can be defined by the customer, but has to be
- * >= 64 bytes
- * however, if not defined before, we set a length of 64 byte
- */
-#if !defined(CPC_MSG_LEN) || (CPC_MSG_LEN < 64)
-#undef CPC_MSG_LEN
-#define CPC_MSG_LEN 64
-#endif
-
-/*
- * Transmission of events from CPC interfaces to PC can be individually
- * controlled per event type. Default state is: don't transmit
- * Control values are constructed by bit-or of Subject and Action
- * and passed to CPC_Control()
- */
-
-/* Control-Values for CPC_Control() Command Subject Selection */
-#define CONTR_CAN_Message 0x04
-#define CONTR_Busload 0x08
-#define CONTR_CAN_State 0x0C
-#define CONTR_SendAck 0x10
-#define CONTR_Filter 0x14
-#define CONTR_CmdQueue 0x18 /* reserved, do not use */
-#define CONTR_BusError 0x1C
-
-/* Control Command Actions */
-#define CONTR_CONT_OFF 0
-#define CONTR_CONT_ON 1
-#define CONTR_SING_ON 2
-/*
- * CONTR_SING_ON doesn't change CONTR_CONT_ON state, so it should be
- * read as: transmit at least once
- */
-
-/* defines for confirmed request */
-#define DO_NOT_CONFIRM 0
-#define DO_CONFIRM 1
-
-/* event flags */
-#define EVENT_READ 0x01
-#define EVENT_WRITE 0x02
-
-/*
- * Messages from CPC to PC contain a message object type field.
- * The following message types are sent by CPC and can be used in
- * handlers, others should be ignored.
- */
-#define CPC_MSG_T_RESYNC 0 /* Normally to be ignored */
-#define CPC_MSG_T_CAN 1 /* CAN data frame */
-#define CPC_MSG_T_BUSLOAD 2 /* Busload message */
-#define CPC_MSG_T_STRING 3 /* Normally to be ignored */
-#define CPC_MSG_T_CONTI 4 /* Normally to be ignored */
-#define CPC_MSG_T_MEM 7 /* Normally not to be handled */
-#define CPC_MSG_T_RTR 8 /* CAN remote frame */
-#define CPC_MSG_T_TXACK 9 /* Send acknowledge */
-#define CPC_MSG_T_POWERUP 10 /* Power-up message */
-#define CPC_MSG_T_CMD_NO 11 /* Normally to be ignored */
-#define CPC_MSG_T_CAN_PRMS 12 /* Actual CAN parameters */
-#define CPC_MSG_T_ABORTED 13 /* Command aborted message */
-#define CPC_MSG_T_CANSTATE 14 /* CAN state message */
-#define CPC_MSG_T_RESET 15 /* used to reset CAN-Controller */
-#define CPC_MSG_T_XCAN 16 /* XCAN data frame */
-#define CPC_MSG_T_XRTR 17 /* XCAN remote frame */
-#define CPC_MSG_T_INFO 18 /* information strings */
-#define CPC_MSG_T_CONTROL 19 /* used for control of interface/driver behaviour */
-#define CPC_MSG_T_CONFIRM 20 /* response type for confirmed requests */
-#define CPC_MSG_T_OVERRUN 21 /* response type for overrun conditions */
-#define CPC_MSG_T_KEEPALIVE 22 /* response type for keep alive conditions */
-#define CPC_MSG_T_CANERROR 23 /* response type for bus error conditions */
-#define CPC_MSG_T_DISCONNECTED 24 /* response type for a disconnected interface */
-#define CPC_MSG_T_ERR_COUNTER 25 /* RX/TX error counter of CAN controller */
-
-#define CPC_MSG_T_FIRMWARE 100 /* response type for USB firmware download */
-
-/*
- * Messages from the PC to the CPC interface contain a command field
- * Most of the command types are wrapped by the library functions and have therefore
- * normally not to be used.
- * However, programmers who wish to circumvent the library and talk directly
- * to the drivers (mainly Linux programmers) can use the following
- * command types:
- */
-#define CPC_CMD_T_CAN 1 /* CAN data frame */
-#define CPC_CMD_T_CONTROL 3 /* used for control of interface/driver behaviour */
-#define CPC_CMD_T_CAN_PRMS 6 /* set CAN parameters */
-#define CPC_CMD_T_CLEARBUF 8 /* clears input queue; this is depricated, use CPC_CMD_T_CLEAR_MSG_QUEUE instead */
-#define CPC_CMD_T_INQ_CAN_PARMS 11 /* inquire actual CAN parameters */
-#define CPC_CMD_T_FILTER_PRMS 12 /* set filter parameter */
-#define CPC_CMD_T_RTR 13 /* CAN remote frame */
-#define CPC_CMD_T_CANSTATE 14 /* CAN state message */
-#define CPC_CMD_T_XCAN 15 /* XCAN data frame */
-#define CPC_CMD_T_XRTR 16 /* XCAN remote frame */
-#define CPC_CMD_T_RESET 17 /* used to reset CAN-Controller */
-#define CPC_CMD_T_INQ_INFO 18 /* miscellanous information strings */
-#define CPC_CMD_T_OPEN_CHAN 19 /* open a channel */
-#define CPC_CMD_T_CLOSE_CHAN 20 /* close a channel */
-#define CPC_CMD_T_CNTBUF 21 /* this is depricated, use CPC_CMD_T_INQ_MSG_QUEUE_CNT instead */
-#define CPC_CMD_T_CAN_EXIT 200 /* exit the CAN (disable interrupts; reset bootrate; reset output_cntr; mode = 1) */
-
-#define CPC_CMD_T_INQ_MSG_QUEUE_CNT CPC_CMD_T_CNTBUF /* inquires the count of elements in the message queue */
-#define CPC_CMD_T_INQ_ERR_COUNTER 25 /* request the CAN controllers error counter */
-#define CPC_CMD_T_CLEAR_MSG_QUEUE CPC_CMD_T_CLEARBUF /* clear CPC_MSG queue */
-#define CPC_CMD_T_CLEAR_CMD_QUEUE 28 /* clear CPC_CMD queue */
-#define CPC_CMD_T_FIRMWARE 100 /* reserved, must not be used */
-#define CPC_CMD_T_USB_RESET 101 /* reserved, must not be used */
-#define CPC_CMD_T_WAIT_NOTIFY 102 /* reserved, must not be used */
-#define CPC_CMD_T_WAIT_SETUP 103 /* reserved, must not be used */
-#define CPC_CMD_T_ABORT 255 /* Normally not to be used */
-
-/* definitions for CPC_MSG_T_INFO information sources */
-#define CPC_INFOMSG_T_UNKNOWN_SOURCE 0
-#define CPC_INFOMSG_T_INTERFACE 1
-#define CPC_INFOMSG_T_DRIVER 2
-#define CPC_INFOMSG_T_LIBRARY 3
-
-/* information types */
-#define CPC_INFOMSG_T_UNKNOWN_TYPE 0
-#define CPC_INFOMSG_T_VERSION 1
-#define CPC_INFOMSG_T_SERIAL 2
-
-/* definitions for controller types */
-#define PCA82C200 1 /* Philips basic CAN controller, replaced by SJA1000 */
-#define SJA1000 2 /* Philips basic CAN controller */
-#define AN82527 3 /* Intel full CAN controller */
-#define M16C_BASIC 4 /* M16C controller running in basic CAN (not full CAN) mode */
-
-/* channel open error codes */
-#define CPC_ERR_NO_FREE_CHANNEL -1 /* no more free space within the channel array */
-#define CPC_ERR_CHANNEL_ALREADY_OPEN -2 /* the channel is already open */
-#define CPC_ERR_CHANNEL_NOT_ACTIVE -3 /* access to a channel not active failed */
-#define CPC_ERR_NO_DRIVER_PRESENT -4 /* no driver at the location searched by the library */
-#define CPC_ERR_NO_INIFILE_PRESENT -5 /* the library could not find the inifile */
-#define CPC_ERR_WRONG_PARAMETERS -6 /* wrong parameters in the inifile */
-#define CPC_ERR_NO_INTERFACE_PRESENT -7 /* 1. The specified interface is not connected */
- /* 2. The interface (mostly CPC-USB) was disconnected upon operation */
-#define CPC_ERR_NO_MATCHING_CHANNEL -8 /* the driver couldn't find a matching channel */
-#define CPC_ERR_NO_BUFFER_AVAILABLE -9 /* the driver couldn't allocate buffer for messages */
-#define CPC_ERR_NO_INTERRUPT -10 /* the requested interrupt couldn't be claimed */
-#define CPC_ERR_NO_MATCHING_INTERFACE -11 /* no interface type related to this channel was found */
-#define CPC_ERR_NO_RESOURCES -12 /* the requested resources could not be claimed */
-#define CPC_ERR_SOCKET -13 /* error concerning TCP sockets */
-
-/* init error codes */
-#define CPC_ERR_WRONG_CONTROLLER_TYPE -14 /* wrong CAN controller type within initialization */
-#define CPC_ERR_NO_RESET_MODE -15 /* the controller could not be set into reset mode */
-#define CPC_ERR_NO_CAN_ACCESS -16 /* the CAN controller could not be accessed */
-
-/* transmit error codes */
-#define CPC_ERR_CAN_WRONG_ID -20 /* the provided CAN id is too big */
-#define CPC_ERR_CAN_WRONG_LENGTH -21 /* the provided CAN length is too long */
-#define CPC_ERR_CAN_NO_TRANSMIT_BUF -22 /* the transmit buffer was occupied */
-#define CPC_ERR_CAN_TRANSMIT_TIMEOUT -23 /* The message could not be sent within a */
- /* specified time */
-
-/* other error codes */
-#define CPC_ERR_SERVICE_NOT_SUPPORTED -30 /* the requested service is not supported by the interface */
-#define CPC_ERR_IO_TRANSFER -31 /* a transmission error down to the driver occurred */
-#define CPC_ERR_TRANSMISSION_FAILED -32 /* a transmission error down to the interface occurred */
-#define CPC_ERR_TRANSMISSION_TIMEOUT -33 /* a timeout occurred within transmission to the interface */
-#define CPC_ERR_OP_SYS_NOT_SUPPORTED -35 /* the operating system is not supported */
-#define CPC_ERR_UNKNOWN -40 /* an unknown error ocurred (mostly IOCTL errors) */
-
-#define CPC_ERR_LOADING_DLL -50 /* the library 'cpcwin.dll' could not be loaded */
-#define CPC_ERR_ASSIGNING_FUNCTION -51 /* the specified function could not be assigned */
-#define CPC_ERR_DLL_INITIALIZATION -52 /* the DLL was not initialized correctly */
-#define CPC_ERR_MISSING_LICFILE -55 /* the file containing the licenses does not exist */
-#define CPC_ERR_MISSING_LICENSE -56 /* a required license was not found */
-
-/* CAN state bit values. Ignore any bits not listed */
-#define CPC_CAN_STATE_BUSOFF 0x80
-#define CPC_CAN_STATE_ERROR 0x40
-
-/* Mask to help ignore undefined bits */
-#define CPC_CAN_STATE_MASK 0xc0
-
-/*
- * CAN-Message representation in a CPC_MS
- * Message object type is CPC_MSG_T_CAN or CPC_MSG_T_RTR
- * or CPC_MSG_T_XCAN or CPC_MSG_T_XRTR
- */
-typedef struct CPC_CAN_MSG {
- u32 id;
- u8 length;
- u8 msg[8];
-} CPC_CAN_MSG_T;
-
-/* representation of the CAN parameters for the PCA82C200 controller */
-typedef struct CPC_PCA82C200_PARAMS {
- u8 acc_code; /* Acceptance-code for receive, Standard: 0 */
- u8 acc_mask; /* Acceptance-mask for receive, Standard: 0xff (everything) */
- u8 btr0; /* Bus-timing register 0 */
- u8 btr1; /* Bus-timing register 1 */
- u8 outp_contr; /* Output-control register */
-} CPC_PCA82C200_PARAMS_T;
-
-/* representation of the CAN parameters for the SJA1000 controller */
-typedef struct CPC_SJA1000_PARAMS {
- u8 mode; /* enables single or dual acceptance filtering */
- u8 acc_code0; /* Acceptance-code for receive, Standard: 0 */
- u8 acc_code1;
- u8 acc_code2;
- u8 acc_code3;
- u8 acc_mask0; /* Acceptance-mask for receive, Standard: 0xff (everything) */
- u8 acc_mask1;
- u8 acc_mask2;
- u8 acc_mask3;
- u8 btr0; /* Bus-timing register 0 */
- u8 btr1; /* Bus-timing register 1 */
- u8 outp_contr; /* Output-control register */
-} CPC_SJA1000_PARAMS_T;
-
-/*
- * representation of the CAN parameters for the M16C controller
- * in basic CAN mode (means no full CAN)
- */
-typedef struct CPC_M16C_BASIC_PARAMS {
- u8 con0;
- u8 con1;
- u8 ctlr0;
- u8 ctlr1;
- u8 clk;
- u8 acc_std_code0;
- u8 acc_std_code1;
- u8 acc_ext_code0;
- u8 acc_ext_code1;
- u8 acc_ext_code2;
- u8 acc_ext_code3;
- u8 acc_std_mask0;
- u8 acc_std_mask1;
- u8 acc_ext_mask0;
- u8 acc_ext_mask1;
- u8 acc_ext_mask2;
- u8 acc_ext_mask3;
-} CPC_M16C_BASIC_PARAMS_T;
-
-/* CAN params message representation */
-typedef struct CPC_CAN_PARAMS {
- u8 cc_type; /* represents the controller type */
- union {
- CPC_M16C_BASIC_PARAMS_T m16c_basic;
- CPC_SJA1000_PARAMS_T sja1000;
- CPC_PCA82C200_PARAMS_T pca82c200;
- } cc_params;
-} CPC_CAN_PARAMS_T;
-
-/* CHAN init params representation */
-typedef struct CPC_CHAN_PARAMS {
- int fd;
-} CPC_CHAN_PARAMS_T;
-
-/* CAN init params message representation */
-typedef struct CPC_INIT_PARAMS {
- CPC_CHAN_PARAMS_T chanparams;
- CPC_CAN_PARAMS_T canparams;
-} CPC_INIT_PARAMS_T;
-
-/* structure for confirmed message handling */
-typedef struct CPC_CONFIRM {
- u8 result; /* error code */
-} CPC_CONFIRM_T;
-
-/* structure for information requests */
-typedef struct CPC_INFO {
- u8 source; /* interface, driver or library */
- u8 type; /* version or serial number */
- char msg[CPC_MSG_LEN - 2]; /* string holding the requested information */
-} CPC_INFO_T;
-
-/*
- * OVERRUN
- * In general two types of overrun may occur.
- * A hardware overrun, where the CAN controller
- * lost a message, because the interrupt was
- * not handled before the next messgae comes in.
- * Or a software overrun, where i.e. a received
- * message could not be stored in the CPC_MSG
- * buffer.
- */
-
-/* After a software overrun has occurred
- * we wait until we have CPC_OVR_GAP slots
- * free in the CPC_MSG buffer.
- */
-#define CPC_OVR_GAP 10
-
-/*
- * Two types of software overrun may occur.
- * A received CAN message or a CAN state event
- * can cause an overrun.
- * Note: A CPC_CMD which would normally store
- * its result immediately in the CPC_MSG
- * queue may fail, because the message queue is full.
- * This will not generate an overrun message, but
- * will halt command execution, until this command
- * is able to store its message in the message queue.
- */
-#define CPC_OVR_EVENT_CAN 0x01
-#define CPC_OVR_EVENT_CANSTATE 0x02
-#define CPC_OVR_EVENT_BUSERROR 0x04
-
-/*
- * If the CAN controller lost a message
- * we indicate it with the highest bit
- * set in the count field.
- */
-#define CPC_OVR_HW 0x80
-
-/* structure for overrun conditions */
-typedef struct {
- u8 event;
- u8 count;
-} CPC_OVERRUN_T;
-
-/*
- * CAN errors
- * Each CAN controller type has different
- * registers to record errors.
- * Therefor a structure containing the specific
- * errors is set up for each controller here
- */
-
-/*
- * SJA1000 error structure
- * see the SJA1000 datasheet for detailed
- * explanation of the registers
- */
-typedef struct CPC_SJA1000_CAN_ERROR {
- u8 ecc; /* error capture code register */
- u8 rxerr; /* RX error counter register */
- u8 txerr; /* TX error counter register */
-} CPC_SJA1000_CAN_ERROR_T;
-
-/*
- * M16C error structure
- * see the M16C datasheet for detailed
- * explanation of the registers
- */
-typedef struct CPC_M16C_CAN_ERROR {
- u8 tbd; /* to be defined */
-} CPC_M16C_CAN_ERROR_T;
-
-/* structure for CAN error conditions */
-#define CPC_CAN_ECODE_ERRFRAME 0x01
-typedef struct CPC_CAN_ERROR {
- u8 ecode;
- struct {
- u8 cc_type; /* CAN controller type */
- union {
- CPC_SJA1000_CAN_ERROR_T sja1000;
- CPC_M16C_CAN_ERROR_T m16c;
- } regs;
- } cc;
-} CPC_CAN_ERROR_T;
-
-/*
- * Structure containing RX/TX error counter.
- * This structure is used to request the
- * values of the CAN controllers TX and RX
- * error counter.
- */
-typedef struct CPC_CAN_ERR_COUNTER {
- u8 rx;
- u8 tx;
-} CPC_CAN_ERR_COUNTER_T;
-
-/* If this flag is set, transmissions from PC to CPC are protected against loss */
-#define CPC_SECURE_TO_CPC 0x01
-
-/* If this flag is set, transmissions from CPC to PC are protected against loss */
-#define CPC_SECURE_TO_PC 0x02
-
-/* If this flag is set, the CAN-transmit buffer is checked to be free before sending a message */
-#define CPC_SECURE_SEND 0x04
-
-/*
- * If this flag is set, the transmission complete flag is checked
- * after sending a message
- * THIS IS CURRENTLY ONLY IMPLEMENTED IN THE PASSIVE INTERFACE DRIVERS
- */
-#define CPC_SECURE_TRANSMIT 0x08
-
-/* main message type used between library and application */
-typedef struct CPC_MSG {
- u8 type; /* type of message */
- u8 length; /* length of data within union 'msg' */
- u8 msgid; /* confirmation handle */
- u32 ts_sec; /* timestamp in seconds */
- u32 ts_nsec; /* timestamp in nano seconds */
- union {
- u8 generic[CPC_MSG_LEN];
- CPC_CAN_MSG_T canmsg;
- CPC_CAN_PARAMS_T canparams;
- CPC_CONFIRM_T confirmation;
- CPC_INFO_T info;
- CPC_OVERRUN_T overrun;
- CPC_CAN_ERROR_T error;
- CPC_CAN_ERR_COUNTER_T err_counter;
- u8 busload;
- u8 canstate;
- } msg;
-} CPC_MSG_T;
-
-#endif /* CPC_HEADER */
diff --git a/drivers/staging/cpc-usb/cpc_int.h b/drivers/staging/cpc-usb/cpc_int.h
deleted file mode 100644
index 38674e9690a..00000000000
--- a/drivers/staging/cpc-usb/cpc_int.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * CPCLIB
- *
- * Copyright (C) 2000-2008 EMS Dr. Thomas Wuensche
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-#ifndef CPC_INT_H
-#define CPC_INT_H
-
-#include <linux/wait.h>
-
-#define CPC_MSG_BUF_CNT 1500
-
-#define CPC_PROC_DIR "driver/"
-
-#undef dbg
-#undef err
-#undef info
-
-/* Use our own dbg macro */
-#define dbg(format, arg...) do { if (debug) printk( KERN_INFO format "\n" , ## arg); } while (0)
-#define err(format, arg...) do { printk( KERN_INFO "ERROR " format "\n" , ## arg); } while (0)
-#define info(format, arg...) do { printk( KERN_INFO format "\n" , ## arg); } while (0)
-
-/* Macros help using of our buffers */
-#define IsBufferFull(x) (!(x)->WnR) && ((x)->iidx == (x)->oidx)
-#define IsBufferEmpty(x) ((x)->WnR) && ((x)->iidx == (x)->oidx)
-#define IsBufferNotEmpty(x) (!(x)->WnR) || ((x)->iidx != (x)->oidx)
-#define ResetBuffer(x) do { (x)->oidx = (x)->iidx=0; (x)->WnR = 1; } while(0);
-
-#define CPC_BufWriteAllowed ((chan->oidx != chan->iidx) || chan->WnR)
-
-typedef void (*chan_write_byte_t) (void *chan, unsigned int reg,
- unsigned char val);
-typedef unsigned char (*chan_read_byte_t) (void *chan, unsigned int reg);
-
-typedef struct CPC_CHAN {
- void __iomem * canBase; /* base address of SJA1000 */
- chan_read_byte_t read_byte; /* CAN controller read access routine */
- chan_write_byte_t write_byte; /* CAN controller write access routine */
- CPC_MSG_T *buf; /* buffer for CPC msg */
- unsigned int iidx;
- unsigned int oidx;
- unsigned int WnR;
- unsigned int minor;
- unsigned int locked;
- unsigned int irqDisabled;
-
- unsigned char cpcCtrlCANMessage;
- unsigned char cpcCtrlCANState;
- unsigned char cpcCtrlBUSState;
-
- unsigned char controllerType;
-
- unsigned long ovrTimeSec;
- unsigned long ovrTimeNSec;
- unsigned long ovrLockedBuffer;
- CPC_OVERRUN_T ovr;
-
- /* for debugging only */
- unsigned int handledIrqs;
- unsigned int lostMessages;
-
- unsigned int sentStdCan;
- unsigned int sentExtCan;
- unsigned int sentStdRtr;
- unsigned int sentExtRtr;
-
- unsigned int recvStdCan;
- unsigned int recvExtCan;
- unsigned int recvStdRtr;
- unsigned int recvExtRtr;
-
- wait_queue_head_t *CPCWait_q;
-
- void *private;
-} CPC_CHAN_T;
-
-#endif
diff --git a/drivers/staging/cpc-usb/cpcusb.h b/drivers/staging/cpc-usb/cpcusb.h
deleted file mode 100644
index 6bdf30be239..00000000000
--- a/drivers/staging/cpc-usb/cpcusb.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/* Header for CPC-USB Driver ********************
- * Copyright 1999, 2000, 2001
- *
- * Company: EMS Dr. Thomas Wuensche
- * Sonnenhang 3
- * 85304 Ilmmuenster
- * Phone: +49-8441-490260
- * Fax: +49-8441-81860
- * email: support@ems-wuensche.com
- * WWW: www.ems-wuensche.com
- */
-
-#ifndef CPCUSB_H
-#define CPCUSB_H
-
-#undef err
-#undef dbg
-#undef info
-
-/* Use our own dbg macro */
-#define dbg(format, arg...) do { if (debug) printk(KERN_INFO "CPC-USB: " format "\n" , ## arg); } while (0)
-#define info(format, arg...) do { printk(KERN_INFO "CPC-USB: " format "\n" , ## arg); } while (0)
-#define err(format, arg...) do { printk(KERN_INFO "CPC-USB(ERROR): " format "\n" , ## arg); } while (0)
-
-#define CPC_USB_CARD_CNT 4
-
-typedef struct CPC_USB_READ_URB {
- unsigned char *buffer; /* the buffer to send data */
- size_t size; /* the size of the send buffer */
- struct urb *urb; /* the urb used to send data */
-} CPC_USB_READ_URB_T;
-
-typedef struct CPC_USB_WRITE_URB {
- unsigned char *buffer; /* the buffer to send data */
- size_t size; /* the size of the send buffer */
- struct urb *urb; /* the urb used to send data */
- atomic_t busy; /* true if write urb is busy */
- struct completion finished; /* wait for the write to finish */
-} CPC_USB_WRITE_URB_T;
-
-#define CPC_USB_URB_CNT 10
-
-typedef struct CPC_USB {
- struct usb_device *udev; /* save off the usb device pointer */
- struct usb_interface *interface; /* the interface for this device */
- unsigned char minor; /* the starting minor number for this device */
- unsigned char num_ports; /* the number of ports this device has */
- int num_intr_in; /* number of interrupt in endpoints we have */
- int num_bulk_in; /* number of bulk in endpoints we have */
- int num_bulk_out; /* number of bulk out endpoints we have */
-
- CPC_USB_READ_URB_T urbs[CPC_USB_URB_CNT];
-
- unsigned char intr_in_buffer[4]; /* interrupt transfer buffer */
- struct urb *intr_in_urb; /* interrupt transfer urb */
-
- CPC_USB_WRITE_URB_T wrUrbs[CPC_USB_URB_CNT];
-
- int open; /* if the port is open or not */
- int present; /* if the device is not disconnected */
- struct semaphore sem; /* locks this structure */
-
- int free_slots; /* free send slots of CPC-USB */
- int idx;
-
- spinlock_t slock;
-
- char serialNumber[128]; /* serial number */
- int productId; /* product id to differ between M16C and LPC2119 */
- CPC_CHAN_T *chan;
-} CPC_USB_T;
-
-#define CPCTable CPCUSB_Table
-
-#define CPC_DRIVER_VERSION "0.724"
-#define CPC_DRIVER_SERIAL "not applicable"
-
-#define OBUF_SIZE 255 /* 4096 */
-
-/* read timeouts -- RD_NAK_TIMEOUT * RD_EXPIRE = Number of seconds */
-#define RD_NAK_TIMEOUT (10*HZ) /* Default number of X seconds to wait */
-#define RD_EXPIRE 12 /* Number of attempts to wait X seconds */
-
-#define CPC_USB_BASE_MNR 0 /* CPC-USB start at minor 0 */
-
-#endif
diff --git a/drivers/staging/cpc-usb/sja2m16c.h b/drivers/staging/cpc-usb/sja2m16c.h
deleted file mode 100644
index 654bd3fc91d..00000000000
--- a/drivers/staging/cpc-usb/sja2m16c.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _SJA2M16C_H
-#define _SJA2M16C_H
-
-#include "cpc.h"
-
-#define BAUDRATE_TOLERANCE_PERCENT 1
-#define SAMPLEPOINT_TOLERANCE_PERCENT 5
-#define SAMPLEPOINT_UPPER_LIMIT 88
-
-/* M16C parameters */
-struct FIELD_C0CONR {
- unsigned int brp:4;
- unsigned int sam:1;
- unsigned int pr:3;
- unsigned int dummy:8;
-};
-struct FIELD_C1CONR {
- unsigned int ph1:3;
- unsigned int ph2:3;
- unsigned int sjw:2;
- unsigned int dummy:8;
-};
-typedef union C0CONR {
- unsigned char c0con;
- struct FIELD_C0CONR bc0con;
-} C0CONR_T;
-typedef union C1CONR {
- unsigned char c1con;
- struct FIELD_C1CONR bc1con;
-} C1CONR_T;
-
-#define SJA_TSEG1 ((pParams->btr1 & 0x0f)+1)
-#define SJA_TSEG2 (((pParams->btr1 & 0x70)>>4)+1)
-#define SJA_BRP ((pParams->btr0 & 0x3f)+1)
-#define SJA_SJW ((pParams->btr0 & 0xc0)>>6)
-#define SJA_SAM ((pParams->btr1 & 0x80)>>7)
-int baudrate_m16c(int clk, int brp, int pr, int ph1, int ph2);
-int samplepoint_m16c(int brp, int pr, int ph1, int ph2);
-int SJA1000_TO_M16C_BASIC_Params(CPC_MSG_T *pMsg);
-
-#endif
diff --git a/drivers/staging/cpc-usb/sja2m16c_2.c b/drivers/staging/cpc-usb/sja2m16c_2.c
deleted file mode 100644
index bf0230fb778..00000000000
--- a/drivers/staging/cpc-usb/sja2m16c_2.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2003,2004 by EMS Dr. Thomas Wuensche
-*
-* - All rights reserved -
-*
-* This code is provided "as is" without warranty of any kind, either
-* expressed or implied, including but not limited to the liability
-* concerning the freedom from material defects, the fitness for parti-
-* cular purposes or the freedom of proprietary rights of third parties.
-*
-*****************************************************************************
-* Module name.: cpcusb
-*****************************************************************************
-* Include file: cpc.h
-*****************************************************************************
-* Project.....: Windows Driver Development Kit
-* Filename....: sja2m16c.cpp
-* Authors.....: (GU) Gerhard Uttenthaler
-* (CS) Christian Schoett
-*****************************************************************************
-* Short descr.: converts baudrate between SJA1000 and M16C
-*****************************************************************************
-* Description.: handles the baudrate conversion from SJA1000 parameters to
-* M16C parameters
-*****************************************************************************
-* Address : EMS Dr. Thomas Wuensche
-* Sonnenhang 3
-* D-85304 Ilmmuenster
-* Tel. : +49-8441-490260
-* Fax. : +49-8441-81860
-* email: support@ems-wuensche.com
-*****************************************************************************
-* History
-*****************************************************************************
-* Version Date Auth Remark
-*
-* 01.00 ?? GU - initial release
-* 01.10 ?????????? CS - adapted to fit into the USB Windows driver
-* 02.00 18.08.2004 GU - improved the baudrate calculating algorithm
-* - implemented acceptance filtering
-* 02.10 10.09.2004 CS - adapted to fit into the USB Windows driver
-*****************************************************************************
-* ToDo's
-*****************************************************************************
-*/
-
-/****************************************************************************/
-/* I N C L U D E S
-*/
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/poll.h>
-#include <linux/smp_lock.h>
-#include <linux/completion.h>
-#include <asm/uaccess.h>
-#include <linux/usb.h>
-
-#include "cpc.h"
-#include "cpc_int.h"
-#include "cpcusb.h"
-
-#include "sja2m16c.h"
-
-/*********************************************************************/
-int baudrate_m16c(int clk, int brp, int pr, int ph1, int ph2)
-{
- return (16000000 / (1 << clk)) / 2 / (brp + 1) / (1 + pr + 1 +
- ph1 + 1 + ph2 +
- 1);
-}
-
-
-/*********************************************************************/
-int samplepoint_m16c(int brp, int pr, int ph1, int ph2)
-{
- return (100 * (1 + pr + 1 + ph1 + 1)) / (1 + pr + 1 + ph1 + 1 +
- ph2 + 1);
-}
-
-
-/****************************************************************************
-* Function.....: SJA1000_TO_M16C_BASIC_Params
-*
-* Task.........: This routine converts SJA1000 CAN btr parameters into M16C
-* parameters based on the sample point and the error. In
-* addition it converts the acceptance filter parameters to
-* suit the M16C parameters
-*
-* Parameters...: None
-*
-* Return values: None
-*
-* Comments.....:
-*****************************************************************************
-* History
-*****************************************************************************
-* 19.01.2005 CS - modifed the conversion of SJA1000 filter params into
-* M16C params. Due to compatibility reasons with the
-* older 82C200 CAN controller the SJA1000
-****************************************************************************/
-int SJA1000_TO_M16C_BASIC_Params(CPC_MSG_T * in)
-{
- int sjaBaudrate;
- int sjaSamplepoint;
- int *baudrate_error; // BRP[0..15], PR[0..7], PH1[0..7], PH2[0..7]
- int *samplepoint_error; // BRP[0..15], PR[0..7], PH1[0..7], PH2[0..7]
- int baudrate_error_merk;
- int clk, brp, pr, ph1, ph2;
- int clk_merk, brp_merk, pr_merk, ph1_merk, ph2_merk;
- int index;
- unsigned char acc_code0, acc_code1, acc_code2, acc_code3;
- unsigned char acc_mask0, acc_mask1, acc_mask2, acc_mask3;
- CPC_MSG_T * out;
- C0CONR_T c0con;
- C1CONR_T c1con;
- int tmpAccCode;
- int tmpAccMask;
-
- // we have to convert the parameters into M16C parameters
- CPC_SJA1000_PARAMS_T * pParams;
-
- // check if the type is CAN parameters and if we have to convert the given params
- if (in->type != CPC_CMD_T_CAN_PRMS
- || in->msg.canparams.cc_type != SJA1000)
- return 0;
- pParams =
- (CPC_SJA1000_PARAMS_T *) & in->msg.canparams.cc_params.sja1000;
- acc_code0 = pParams->acc_code0;
- acc_code1 = pParams->acc_code1;
- acc_code2 = pParams->acc_code2;
- acc_code3 = pParams->acc_code3;
- acc_mask0 = pParams->acc_mask0;
- acc_mask1 = pParams->acc_mask1;
- acc_mask2 = pParams->acc_mask2;
- acc_mask3 = pParams->acc_mask3;
-
-#ifdef _DEBUG_OUTPUT_CAN_PARAMS
- info("acc_code0: %2.2Xh\n", acc_code0);
- info("acc_code1: %2.2Xh\n", acc_code1);
- info("acc_code2: %2.2Xh\n", acc_code2);
- info("acc_code3: %2.2Xh\n", acc_code3);
- info("acc_mask0: %2.2Xh\n", acc_mask0);
- info("acc_mask1: %2.2Xh\n", acc_mask1);
- info("acc_mask2: %2.2Xh\n", acc_mask2);
- info("acc_mask3: %2.2Xh\n", acc_mask3);
-
-#endif /* */
- if (!
- (baudrate_error =
- (int *) vmalloc(sizeof(int) * 16 * 8 * 8 * 8 * 5))) {
- err("Could not allocate memory\n");
- return -3;
- }
- if (!
- (samplepoint_error =
- (int *) vmalloc(sizeof(int) * 16 * 8 * 8 * 8 * 5))) {
- err("Could not allocate memory\n");
- vfree(baudrate_error);
- return -3;
- }
- memset(baudrate_error, 0xff, sizeof(baudrate_error));
- memset(samplepoint_error, 0xff, sizeof(baudrate_error));
- sjaBaudrate =
- 16000000 / 2 / SJA_BRP / (1 + SJA_TSEG1 + SJA_TSEG2);
- sjaSamplepoint =
- 100 * (1 + SJA_TSEG1) / (1 + SJA_TSEG1 + SJA_TSEG2);
- if (sjaBaudrate == 0) {
- vfree(baudrate_error);
- vfree(samplepoint_error);
- return -2;
- }
-
-#ifdef _DEBUG_OUTPUT_CAN_PARAMS
- info("\nStarting SJA CAN params\n");
- info("-------------------------\n");
- info("TS1 : %2.2Xh TS2 : %2.2Xh\n", SJA_TSEG1, SJA_TSEG2);
- info("BTR0 : %2.2Xh BTR1: %2.2Xh\n", pParams->btr0,
- pParams->btr1);
- info("Baudrate: %d.%dkBaud\n", sjaBaudrate / 1000,
- sjaBaudrate % 1000);
- info("Sample P: 0.%d\n", sjaSamplepoint);
- info("\n");
-
-#endif /* */
- c0con.bc0con.sam = SJA_SAM;
- c1con.bc1con.sjw = SJA_SJW;
-
- // calculate errors for all baudrates
- index = 0;
- for (clk = 0; clk < 5; clk++) {
- for (brp = 0; brp < 16; brp++) {
- for (pr = 0; pr < 8; pr++) {
- for (ph1 = 0; ph1 < 8; ph1++) {
- for (ph2 = 0; ph2 < 8; ph2++) {
- baudrate_error[index] =
- 100 *
- abs(baudrate_m16c
- (clk, brp, pr, ph1,
- ph2) -
- sjaBaudrate) /
- sjaBaudrate;
- samplepoint_error[index] =
- abs(samplepoint_m16c
- (brp, pr, ph1,
- ph2) -
- sjaSamplepoint);
-
-#if 0
- info
- ("Baudrate : %d kBaud\n",
- baudrate_m16c(clk,
- brp, pr,
- ph1,
- ph2));
- info
- ("Baudrate Error: %d\n",
- baudrate_error
- [index]);
- info
- ("Sample P Error: %d\n",
- samplepoint_error
- [index]);
- info
- ("clk : %d\n",
- clk);
-
-#endif /* */
- index++;
- }
- }
- }
- }
- }
-
- // mark all baudrate_error entries which are outer limits
- index = 0;
- for (clk = 0; clk < 5; clk++) {
- for (brp = 0; brp < 16; brp++) {
- for (pr = 0; pr < 8; pr++) {
- for (ph1 = 0; ph1 < 8; ph1++) {
- for (ph2 = 0; ph2 < 8; ph2++) {
- if ((baudrate_error[index]
- >
- BAUDRATE_TOLERANCE_PERCENT)
- ||
- (samplepoint_error
- [index] >
- SAMPLEPOINT_TOLERANCE_PERCENT)
- ||
- (samplepoint_m16c
- (brp, pr, ph1,
- ph2) >
- SAMPLEPOINT_UPPER_LIMIT))
- {
- baudrate_error
- [index] = -1;
- } else
- if (((1 + pr + 1 +
- ph1 + 1 + ph2 +
- 1) < 8)
- ||
- ((1 + pr + 1 +
- ph1 + 1 + ph2 +
- 1) > 25)) {
- baudrate_error
- [index] = -1;
- }
-
-#if 0
- else {
- info
- ("Baudrate : %d kBaud\n",
- baudrate_m16c
- (clk, brp, pr,
- ph1, ph2));
- info
- ("Baudrate Error: %d\n",
- baudrate_error
- [index]);
- info
- ("Sample P Error: %d\n",
- samplepoint_error
- [index]);
- }
-
-#endif /* */
- index++;
- }
- }
- }
- }
- }
-
- // find list of minimum of baudrate_error within unmarked entries
- clk_merk = brp_merk = pr_merk = ph1_merk = ph2_merk = 0;
- baudrate_error_merk = 100;
- index = 0;
- for (clk = 0; clk < 5; clk++) {
- for (brp = 0; brp < 16; brp++) {
- for (pr = 0; pr < 8; pr++) {
- for (ph1 = 0; ph1 < 8; ph1++) {
- for (ph2 = 0; ph2 < 8; ph2++) {
- if (baudrate_error[index]
- != -1) {
- if (baudrate_error
- [index] <
- baudrate_error_merk)
- {
- baudrate_error_merk
- =
- baudrate_error
- [index];
- brp_merk =
- brp;
- pr_merk =
- pr;
- ph1_merk =
- ph1;
- ph2_merk =
- ph2;
- clk_merk =
- clk;
-
-#if 0
- info
- ("brp: %2.2Xh pr: %2.2Xh ph1: %2.2Xh ph2: %2.2Xh\n",
- brp,
- pr,
- ph1,
- ph2);
- info
- ("Baudrate : %d kBaud\n",
- baudrate_m16c
- (clk,
- brp,
- pr,
- ph1,
- ph2));
- info
- ("Baudrate Error: %d\n",
- baudrate_error
- [index]);
- info
- ("Sample P Error: %d\n",
- samplepoint_error
- [index]);
-
-#endif /* */
- }
- }
- index++;
- }
- }
- }
- }
- }
- if (baudrate_error_merk == 100) {
- info("ERROR: Could not convert CAN init parameter\n");
- vfree(baudrate_error);
- vfree(samplepoint_error);
- return -1;
- }
-
- // setting m16c CAN parameter
- c0con.bc0con.brp = brp_merk;
- c0con.bc0con.pr = pr_merk;
- c1con.bc1con.ph1 = ph1_merk;
- c1con.bc1con.ph2 = ph2_merk;
-
-#ifdef _DEBUG_OUTPUT_CAN_PARAMS
- info("\nResulting M16C CAN params\n");
- info("-------------------------\n");
- info("clk : %2.2Xh\n", clk_merk);
- info("ph1 : %2.2Xh ph2: %2.2Xh\n", c1con.bc1con.ph1 + 1,
- c1con.bc1con.ph2 + 1);
- info("pr : %2.2Xh brp: %2.2Xh\n", c0con.bc0con.pr + 1,
- c0con.bc0con.brp + 1);
- info("sjw : %2.2Xh sam: %2.2Xh\n", c1con.bc1con.sjw,
- c0con.bc0con.sam);
- info("co1 : %2.2Xh co0: %2.2Xh\n", c1con.c1con, c0con.c0con);
- info("Baudrate: %d.%dBaud\n",
- baudrate_m16c(clk_merk, c0con.bc0con.brp, c0con.bc0con.pr,
- c1con.bc1con.ph1, c1con.bc1con.ph2) / 1000,
- baudrate_m16c(clk_merk, c0con.bc0con.brp, c0con.bc0con.pr,
- c1con.bc1con.ph1, c1con.bc1con.ph2) % 1000);
- info("Sample P: 0.%d\n",
- samplepoint_m16c(c0con.bc0con.brp, c0con.bc0con.pr,
- c1con.bc1con.ph1, c1con.bc1con.ph2));
- info("\n");
-
-#endif /* */
- out = in;
- out->type = 6;
- out->length = sizeof(CPC_M16C_BASIC_PARAMS_T) + 1;
- out->msg.canparams.cc_type = M16C_BASIC;
- out->msg.canparams.cc_params.m16c_basic.con0 = c0con.c0con;
- out->msg.canparams.cc_params.m16c_basic.con1 = c1con.c1con;
- out->msg.canparams.cc_params.m16c_basic.ctlr0 = 0x4C;
- out->msg.canparams.cc_params.m16c_basic.ctlr1 = 0x00;
- out->msg.canparams.cc_params.m16c_basic.clk = clk_merk;
- out->msg.canparams.cc_params.m16c_basic.acc_std_code0 =
- acc_code0;
- out->msg.canparams.cc_params.m16c_basic.acc_std_code1 = acc_code1;
-
-// info("code0: 0x%2.2X, code1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_code0, out->msg.canparams.cc_params.m16c_basic.acc_std_code1);
- tmpAccCode = (acc_code1 >> 5) + (acc_code0 << 3);
- out->msg.canparams.cc_params.m16c_basic.acc_std_code0 =
- (unsigned char) tmpAccCode;
- out->msg.canparams.cc_params.m16c_basic.acc_std_code1 =
- (unsigned char) (tmpAccCode >> 8);
-
-// info("code0: 0x%2.2X, code1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_code0, out->msg.canparams.cc_params.m16c_basic.acc_std_code1);
- out->msg.canparams.cc_params.m16c_basic.acc_std_mask0 =
- ~acc_mask0;
- out->msg.canparams.cc_params.m16c_basic.acc_std_mask1 =
- ~acc_mask1;
-
-// info("mask0: 0x%2.2X, mask1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_mask0, out->msg.canparams.cc_params.m16c_basic.acc_std_mask1);
- tmpAccMask = ((acc_mask1) >> 5) + ((acc_mask0) << 3);
-
-// info("tmpAccMask: 0x%4.4X\n", tmpAccMask);
- out->msg.canparams.cc_params.m16c_basic.acc_std_mask0 =
- (unsigned char) ~tmpAccMask;
- out->msg.canparams.cc_params.m16c_basic.acc_std_mask1 =
- (unsigned char) ~(tmpAccMask >> 8);
-
-// info("mask0: 0x%2.2X, mask1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_mask0, out->msg.canparams.cc_params.m16c_basic.acc_std_mask1);
- out->msg.canparams.cc_params.m16c_basic.acc_ext_code0 =
- (unsigned char) tmpAccCode;
- out->msg.canparams.cc_params.m16c_basic.acc_ext_code1 =
- (unsigned char) (tmpAccCode >> 8);
- out->msg.canparams.cc_params.m16c_basic.acc_ext_code2 = acc_code2;
- out->msg.canparams.cc_params.m16c_basic.acc_ext_code3 = acc_code3;
- out->msg.canparams.cc_params.m16c_basic.acc_ext_mask0 =
- (unsigned char) ~tmpAccMask;
- out->msg.canparams.cc_params.m16c_basic.acc_ext_mask1 =
- (unsigned char) ~(tmpAccMask >> 8);
- out->msg.canparams.cc_params.m16c_basic.acc_ext_mask2 =
- ~acc_mask2;
- out->msg.canparams.cc_params.m16c_basic.acc_ext_mask3 =
- ~acc_mask3;
- vfree(baudrate_error);
- vfree(samplepoint_error);
- return 0;
-}
-
-
diff --git a/drivers/staging/go7007/Makefile b/drivers/staging/go7007/Makefile
index d14ea84a01f..1301caa7495 100644
--- a/drivers/staging/go7007/Makefile
+++ b/drivers/staging/go7007/Makefile
@@ -32,8 +32,3 @@ endif
EXTRA_CFLAGS += -Idrivers/media/dvb/frontends
EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core
-
-# Ubuntu 8.04 has CONFIG_SND undefined, so include lum sound/config.h too
-ifeq ($(CONFIG_SND),)
-EXTRA_CFLAGS += -include sound/config.h
-endif
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index ebd7237230e..240750881d2 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -22,7 +22,6 @@ config USB_ARCH_HAS_HCD
default y if PCMCIA && !M32R # sl811_cs
default y if ARM # SL-811
default y if SUPERH # r8a66597-hcd
- default y if MICROBLAZE
default PCI
# many non-PCI SOC chips embed OHCI
diff --git a/drivers/usb/gadget/f_loopback.c b/drivers/usb/gadget/f_loopback.c
index eb6ddfc2085..6cb29d3df57 100644
--- a/drivers/usb/gadget/f_loopback.c
+++ b/drivers/usb/gadget/f_loopback.c
@@ -22,7 +22,6 @@
/* #define VERBOSE_DEBUG */
#include <linux/kernel.h>
-#include <linux/utsname.h>
#include <linux/device.h>
#include "g_zero.h"
diff --git a/drivers/usb/gadget/f_obex.c b/drivers/usb/gadget/f_obex.c
index 46d6266f30e..b4a3ba654ea 100644
--- a/drivers/usb/gadget/f_obex.c
+++ b/drivers/usb/gadget/f_obex.c
@@ -24,7 +24,6 @@
/* #define VERBOSE_DEBUG */
#include <linux/kernel.h>
-#include <linux/utsname.h>
#include <linux/device.h>
#include "u_serial.h"
diff --git a/drivers/usb/gadget/f_sourcesink.c b/drivers/usb/gadget/f_sourcesink.c
index bffe91d525f..09cba273d2d 100644
--- a/drivers/usb/gadget/f_sourcesink.c
+++ b/drivers/usb/gadget/f_sourcesink.c
@@ -22,7 +22,6 @@
/* #define VERBOSE_DEBUG */
#include <linux/kernel.h>
-#include <linux/utsname.h>
#include <linux/device.h>
#include "g_zero.h"
diff --git a/drivers/usb/gadget/u_audio.c b/drivers/usb/gadget/u_audio.c
index b5200d55145..8252595d619 100644
--- a/drivers/usb/gadget/u_audio.c
+++ b/drivers/usb/gadget/u_audio.c
@@ -10,7 +10,6 @@
*/
#include <linux/kernel.h>
-#include <linux/utsname.h>
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/ctype.h>
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index f8751ff863c..2fc02bd9584 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -23,7 +23,6 @@
/* #define VERBOSE_DEBUG */
#include <linux/kernel.h>
-#include <linux/utsname.h>
#include <linux/device.h>
#include <linux/ctype.h>
#include <linux/etherdevice.h>
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 68fa0e43b78..8c075b2416b 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -912,6 +912,7 @@ static void sierra_release(struct usb_serial *serial)
}
}
+#ifdef CONFIG_PM
static void stop_read_write_urbs(struct usb_serial *serial)
{
int i, j;
@@ -988,6 +989,10 @@ static int sierra_resume(struct usb_serial *serial)
return ec ? -EIO : 0;
}
+#else
+#define sierra_suspend NULL
+#define sierra_resume NULL
+#endif
static struct usb_serial_driver sierra_device = {
.driver = {
diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c
index ba3d71f5c7d..9554ad5f9af 100644
--- a/drivers/vlynq/vlynq.c
+++ b/drivers/vlynq/vlynq.c
@@ -702,7 +702,7 @@ static int vlynq_probe(struct platform_device *pdev)
dev->mem_start = mem_res->start;
dev->mem_end = mem_res->end;
- len = regs_res->end - regs_res->start;
+ len = resource_size(regs_res);
if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) {
printk(KERN_ERR "%s: Can't request vlynq registers\n",
dev_name(&dev->dev));
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 74e0723e90b..795233702a4 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -8,3 +8,12 @@ config 9P_FS
See <http://v9fs.sf.net> for more information.
If unsure, say N.
+
+config 9P_FSCACHE
+ bool "Enable 9P client caching support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y
+ help
+ Choose Y here to enable persistent, read-only local
+ caching support for 9p clients using FS-Cache
+
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index bc7f0d1551e..1a940ec7af6 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -8,5 +8,6 @@ obj-$(CONFIG_9P_FS) := 9p.o
vfs_dir.o \
vfs_dentry.o \
v9fs.o \
- fid.o \
+ fid.o
+9p-$(CONFIG_9P_FSCACHE) += cache.o
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
new file mode 100644
index 00000000000..51c94e26a34
--- /dev/null
+++ b/fs/9p/cache.c
@@ -0,0 +1,474 @@
+/*
+ * V9FS cache definitions.
+ *
+ * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/jiffies.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <net/9p/9p.h>
+
+#include "v9fs.h"
+#include "cache.h"
+
+#define CACHETAG_LEN 11
+
+struct kmem_cache *vcookie_cache;
+
+struct fscache_netfs v9fs_cache_netfs = {
+ .name = "9p",
+ .version = 0,
+};
+
+static void init_once(void *foo)
+{
+ struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
+ vcookie->fscache = NULL;
+ vcookie->qid = NULL;
+ inode_init_once(&vcookie->inode);
+}
+
+/**
+ * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
+ * vcookie to inode mapping
+ *
+ * Returns 0 on success.
+ */
+
+static int v9fs_init_vcookiecache(void)
+{
+ vcookie_cache = kmem_cache_create("vcookie_cache",
+ sizeof(struct v9fs_cookie),
+ 0, (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD),
+ init_once);
+ if (!vcookie_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * v9fs_destroy_vcookiecache - destroy the cache of vcookies
+ *
+ */
+
+static void v9fs_destroy_vcookiecache(void)
+{
+ kmem_cache_destroy(vcookie_cache);
+}
+
+int __v9fs_cache_register(void)
+{
+ int ret;
+ ret = v9fs_init_vcookiecache();
+ if (ret < 0)
+ return ret;
+
+ return fscache_register_netfs(&v9fs_cache_netfs);
+}
+
+void __v9fs_cache_unregister(void)
+{
+ v9fs_destroy_vcookiecache();
+ fscache_unregister_netfs(&v9fs_cache_netfs);
+}
+
+/**
+ * v9fs_random_cachetag - Generate a random tag to be associated
+ * with a new cache session.
+ *
+ * The value of jiffies is used for a fairly randomly cache tag.
+ */
+
+static
+int v9fs_random_cachetag(struct v9fs_session_info *v9ses)
+{
+ v9ses->cachetag = kmalloc(CACHETAG_LEN, GFP_KERNEL);
+ if (!v9ses->cachetag)
+ return -ENOMEM;
+
+ return scnprintf(v9ses->cachetag, CACHETAG_LEN, "%lu", jiffies);
+}
+
+static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ struct v9fs_session_info *v9ses;
+ uint16_t klen = 0;
+
+ v9ses = (struct v9fs_session_info *)cookie_netfs_data;
+ P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses,
+ buffer, bufmax);
+
+ if (v9ses->cachetag)
+ klen = strlen(v9ses->cachetag);
+
+ if (klen > bufmax)
+ return 0;
+
+ memcpy(buffer, v9ses->cachetag, klen);
+ P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag);
+ return klen;
+}
+
+const struct fscache_cookie_def v9fs_cache_session_index_def = {
+ .name = "9P.session",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = v9fs_cache_session_get_key,
+};
+
+void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
+{
+ /* If no cache session tag was specified, we generate a random one. */
+ if (!v9ses->cachetag)
+ v9fs_random_cachetag(v9ses);
+
+ v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
+ &v9fs_cache_session_index_def,
+ v9ses);
+ P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses,
+ v9ses->fscache);
+}
+
+void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
+{
+ P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses,
+ v9ses->fscache);
+ fscache_relinquish_cookie(v9ses->fscache, 0);
+ v9ses->fscache = NULL;
+}
+
+
+static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ const struct v9fs_cookie *vcookie = cookie_netfs_data;
+ memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path));
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode,
+ vcookie->qid->path);
+ return sizeof(vcookie->qid->path);
+}
+
+static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
+ uint64_t *size)
+{
+ const struct v9fs_cookie *vcookie = cookie_netfs_data;
+ *size = i_size_read(&vcookie->inode);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode,
+ *size);
+}
+
+static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t buflen)
+{
+ const struct v9fs_cookie *vcookie = cookie_netfs_data;
+ memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version));
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode,
+ vcookie->qid->version);
+ return sizeof(vcookie->qid->version);
+}
+
+static enum
+fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
+ const void *buffer,
+ uint16_t buflen)
+{
+ const struct v9fs_cookie *vcookie = cookie_netfs_data;
+
+ if (buflen != sizeof(vcookie->qid->version))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ if (memcmp(buffer, &vcookie->qid->version,
+ sizeof(vcookie->qid->version)))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
+{
+ struct v9fs_cookie *vcookie = cookie_netfs_data;
+ struct pagevec pvec;
+ pgoff_t first;
+ int loop, nr_pages;
+
+ pagevec_init(&pvec, 0);
+ first = 0;
+
+ for (;;) {
+ nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping,
+ first,
+ PAGEVEC_SIZE - pagevec_count(&pvec));
+ if (!nr_pages)
+ break;
+
+ for (loop = 0; loop < nr_pages; loop++)
+ ClearPageFsCache(pvec.pages[loop]);
+
+ first = pvec.pages[nr_pages - 1]->index + 1;
+
+ pvec.nr = nr_pages;
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+}
+
+const struct fscache_cookie_def v9fs_cache_inode_index_def = {
+ .name = "9p.inode",
+ .type = FSCACHE_COOKIE_TYPE_DATAFILE,
+ .get_key = v9fs_cache_inode_get_key,
+ .get_attr = v9fs_cache_inode_get_attr,
+ .get_aux = v9fs_cache_inode_get_aux,
+ .check_aux = v9fs_cache_inode_check_aux,
+ .now_uncached = v9fs_cache_inode_now_uncached,
+};
+
+void v9fs_cache_inode_get_cookie(struct inode *inode)
+{
+ struct v9fs_cookie *vcookie;
+ struct v9fs_session_info *v9ses;
+
+ if (!S_ISREG(inode->i_mode))
+ return;
+
+ vcookie = v9fs_inode2cookie(inode);
+ if (vcookie->fscache)
+ return;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+ &v9fs_cache_inode_index_def,
+ vcookie);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
+ vcookie->fscache);
+}
+
+void v9fs_cache_inode_put_cookie(struct inode *inode)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ if (!vcookie->fscache)
+ return;
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
+ vcookie->fscache);
+
+ fscache_relinquish_cookie(vcookie->fscache, 0);
+ vcookie->fscache = NULL;
+}
+
+void v9fs_cache_inode_flush_cookie(struct inode *inode)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ if (!vcookie->fscache)
+ return;
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
+ vcookie->fscache);
+
+ fscache_relinquish_cookie(vcookie->fscache, 1);
+ vcookie->fscache = NULL;
+}
+
+void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct p9_fid *fid;
+
+ if (!vcookie->fscache)
+ return;
+
+ spin_lock(&vcookie->lock);
+ fid = filp->private_data;
+ if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+ v9fs_cache_inode_flush_cookie(inode);
+ else
+ v9fs_cache_inode_get_cookie(inode);
+
+ spin_unlock(&vcookie->lock);
+}
+
+void v9fs_cache_inode_reset_cookie(struct inode *inode)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_session_info *v9ses;
+ struct fscache_cookie *old;
+
+ if (!vcookie->fscache)
+ return;
+
+ old = vcookie->fscache;
+
+ spin_lock(&vcookie->lock);
+ fscache_relinquish_cookie(vcookie->fscache, 1);
+
+ v9ses = v9fs_inode2v9ses(inode);
+ vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+ &v9fs_cache_inode_index_def,
+ vcookie);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
+ inode, old, vcookie->fscache);
+
+ spin_unlock(&vcookie->lock);
+}
+
+int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ struct inode *inode = page->mapping->host;
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ BUG_ON(!vcookie->fscache);
+
+ if (PageFsCache(page)) {
+ if (fscache_check_page_write(vcookie->fscache, page)) {
+ if (!(gfp & __GFP_WAIT))
+ return 0;
+ fscache_wait_on_page_write(vcookie->fscache, page);
+ }
+
+ fscache_uncache_page(vcookie->fscache, page);
+ ClearPageFsCache(page);
+ }
+
+ return 1;
+}
+
+void __v9fs_fscache_invalidate_page(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ BUG_ON(!vcookie->fscache);
+
+ if (PageFsCache(page)) {
+ fscache_wait_on_page_write(vcookie->fscache, page);
+ BUG_ON(!PageLocked(page));
+ fscache_uncache_page(vcookie->fscache, page);
+ ClearPageFsCache(page);
+ }
+}
+
+static void v9fs_vfs_readpage_complete(struct page *page, void *data,
+ int error)
+{
+ if (!error)
+ SetPageUptodate(page);
+
+ unlock_page(page);
+}
+
+/**
+ * __v9fs_readpage_from_fscache - read a page from cache
+ *
+ * Returns 0 if the pages are in cache and a BIO is submitted,
+ * 1 if the pages are not in cache and -error otherwise.
+ */
+
+int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+ int ret;
+ const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+ if (!vcookie->fscache)
+ return -ENOBUFS;
+
+ ret = fscache_read_or_alloc_page(vcookie->fscache,
+ page,
+ v9fs_vfs_readpage_complete,
+ NULL,
+ GFP_KERNEL);
+ switch (ret) {
+ case -ENOBUFS:
+ case -ENODATA:
+ P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret);
+ return 1;
+ case 0:
+ P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+ return ret;
+ default:
+ P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+ return ret;
+ }
+}
+
+/**
+ * __v9fs_readpages_from_fscache - read multiple pages from cache
+ *
+ * Returns 0 if the pages are in cache and a BIO is submitted,
+ * 1 if the pages are not in cache and -error otherwise.
+ */
+
+int __v9fs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ int ret;
+ const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
+ if (!vcookie->fscache)
+ return -ENOBUFS;
+
+ ret = fscache_read_or_alloc_pages(vcookie->fscache,
+ mapping, pages, nr_pages,
+ v9fs_vfs_readpage_complete,
+ NULL,
+ mapping_gfp_mask(mapping));
+ switch (ret) {
+ case -ENOBUFS:
+ case -ENODATA:
+ P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret);
+ return 1;
+ case 0:
+ BUG_ON(!list_empty(pages));
+ BUG_ON(*nr_pages != 0);
+ P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+ return ret;
+ default:
+ P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+ return ret;
+ }
+}
+
+/**
+ * __v9fs_readpage_to_fscache - write a page to the cache
+ *
+ */
+
+void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+ int ret;
+ const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+ ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL);
+ P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret);
+ if (ret != 0)
+ v9fs_uncache_page(inode, page);
+}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
new file mode 100644
index 00000000000..a94192bfaee
--- /dev/null
+++ b/fs/9p/cache.h
@@ -0,0 +1,176 @@
+/*
+ * V9FS cache definitions.
+ *
+ * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#ifndef _9P_CACHE_H
+#ifdef CONFIG_9P_FSCACHE
+#include <linux/fscache.h>
+#include <linux/spinlock.h>
+
+extern struct kmem_cache *vcookie_cache;
+
+struct v9fs_cookie {
+ spinlock_t lock;
+ struct inode inode;
+ struct fscache_cookie *fscache;
+ struct p9_qid *qid;
+};
+
+static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
+{
+ return container_of(inode, struct v9fs_cookie, inode);
+}
+
+extern struct fscache_netfs v9fs_cache_netfs;
+extern const struct fscache_cookie_def v9fs_cache_session_index_def;
+extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
+
+extern void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses);
+extern void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses);
+
+extern void v9fs_cache_inode_get_cookie(struct inode *inode);
+extern void v9fs_cache_inode_put_cookie(struct inode *inode);
+extern void v9fs_cache_inode_flush_cookie(struct inode *inode);
+extern void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp);
+extern void v9fs_cache_inode_reset_cookie(struct inode *inode);
+
+extern int __v9fs_cache_register(void);
+extern void __v9fs_cache_unregister(void);
+
+extern int __v9fs_fscache_release_page(struct page *page, gfp_t gfp);
+extern void __v9fs_fscache_invalidate_page(struct page *page);
+extern int __v9fs_readpage_from_fscache(struct inode *inode,
+ struct page *page);
+extern int __v9fs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages);
+extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
+
+
+/**
+ * v9fs_cache_register - Register v9fs file system with the cache
+ */
+static inline int v9fs_cache_register(void)
+{
+ return __v9fs_cache_register();
+}
+
+/**
+ * v9fs_cache_unregister - Unregister v9fs from the cache
+ */
+static inline void v9fs_cache_unregister(void)
+{
+ __v9fs_cache_unregister();
+}
+
+static inline int v9fs_fscache_release_page(struct page *page,
+ gfp_t gfp)
+{
+ return __v9fs_fscache_release_page(page, gfp);
+}
+
+static inline void v9fs_fscache_invalidate_page(struct page *page)
+{
+ __v9fs_fscache_invalidate_page(page);
+}
+
+static inline int v9fs_readpage_from_fscache(struct inode *inode,
+ struct page *page)
+{
+ return __v9fs_readpage_from_fscache(inode, page);
+}
+
+static inline int v9fs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ return __v9fs_readpages_from_fscache(inode, mapping, pages,
+ nr_pages);
+}
+
+static inline void v9fs_readpage_to_fscache(struct inode *inode,
+ struct page *page)
+{
+ if (PageFsCache(page))
+ __v9fs_readpage_to_fscache(inode, page);
+}
+
+static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ fscache_uncache_page(vcookie->fscache, page);
+ BUG_ON(PageFsCache(page));
+}
+
+static inline void v9fs_vcookie_set_qid(struct inode *inode,
+ struct p9_qid *qid)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ spin_lock(&vcookie->lock);
+ vcookie->qid = qid;
+ spin_unlock(&vcookie->lock);
+}
+
+#else /* CONFIG_9P_FSCACHE */
+
+static inline int v9fs_cache_register(void)
+{
+ return 1;
+}
+
+static inline void v9fs_cache_unregister(void) {}
+
+static inline int v9fs_fscache_release_page(struct page *page,
+ gfp_t gfp) {
+ return 1;
+}
+
+static inline void v9fs_fscache_invalidate_page(struct page *page) {}
+
+static inline int v9fs_readpage_from_fscache(struct inode *inode,
+ struct page *page)
+{
+ return -ENOBUFS;
+}
+
+static inline int v9fs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ return -ENOBUFS;
+}
+
+static inline void v9fs_readpage_to_fscache(struct inode *inode,
+ struct page *page)
+{}
+
+static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
+{}
+
+static inline void v9fs_vcookie_set_qid(struct inode *inode,
+ struct p9_qid *qid)
+{}
+
+#endif /* CONFIG_9P_FSCACHE */
+#endif /* _9P_CACHE_H */
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f7003cfac63..cf62b05e296 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -34,21 +34,25 @@
#include <net/9p/transport.h>
#include "v9fs.h"
#include "v9fs_vfs.h"
+#include "cache.h"
+
+static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
+static LIST_HEAD(v9fs_sessionlist);
/*
- * Option Parsing (code inspired by NFS code)
- * NOTE: each transport will parse its own options
- */
+ * Option Parsing (code inspired by NFS code)
+ * NOTE: each transport will parse its own options
+ */
enum {
/* Options that take integer arguments */
Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid,
/* String options */
- Opt_uname, Opt_remotename, Opt_trans,
+ Opt_uname, Opt_remotename, Opt_trans, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
Opt_nodevmap,
/* Cache options */
- Opt_cache_loose,
+ Opt_cache_loose, Opt_fscache,
/* Access options */
Opt_access,
/* Error token */
@@ -63,8 +67,10 @@ static const match_table_t tokens = {
{Opt_uname, "uname=%s"},
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
- {Opt_cache_loose, "cache=loose"},
+ {Opt_cache, "cache=%s"},
{Opt_cache_loose, "loose"},
+ {Opt_fscache, "fscache"},
+ {Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
{Opt_err, NULL}
};
@@ -89,16 +95,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
v9ses->afid = ~0;
v9ses->debug = 0;
v9ses->cache = 0;
+#ifdef CONFIG_9P_FSCACHE
+ v9ses->cachetag = NULL;
+#endif
if (!opts)
return 0;
options = kstrdup(opts, GFP_KERNEL);
- if (!options) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "failed to allocate copy of option string\n");
- return -ENOMEM;
- }
+ if (!options)
+ goto fail_option_alloc;
while ((p = strsep(&options, ",")) != NULL) {
int token;
@@ -143,16 +149,33 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_cache_loose:
v9ses->cache = CACHE_LOOSE;
break;
+ case Opt_fscache:
+ v9ses->cache = CACHE_FSCACHE;
+ break;
+ case Opt_cachetag:
+#ifdef CONFIG_9P_FSCACHE
+ v9ses->cachetag = match_strdup(&args[0]);
+#endif
+ break;
+ case Opt_cache:
+ s = match_strdup(&args[0]);
+ if (!s)
+ goto fail_option_alloc;
+
+ if (strcmp(s, "loose") == 0)
+ v9ses->cache = CACHE_LOOSE;
+ else if (strcmp(s, "fscache") == 0)
+ v9ses->cache = CACHE_FSCACHE;
+ else
+ v9ses->cache = CACHE_NONE;
+ kfree(s);
+ break;
case Opt_access:
s = match_strdup(&args[0]);
- if (!s) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "failed to allocate copy"
- " of option argument\n");
- ret = -ENOMEM;
- break;
- }
+ if (!s)
+ goto fail_option_alloc;
+
v9ses->flags &= ~V9FS_ACCESS_MASK;
if (strcmp(s, "user") == 0)
v9ses->flags |= V9FS_ACCESS_USER;
@@ -173,6 +196,11 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
}
kfree(options);
return ret;
+
+fail_option_alloc:
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "failed to allocate copy of option argument\n");
+ return -ENOMEM;
}
/**
@@ -200,6 +228,10 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
return ERR_PTR(-ENOMEM);
}
+ spin_lock(&v9fs_sessionlist_lock);
+ list_add(&v9ses->slist, &v9fs_sessionlist);
+ spin_unlock(&v9fs_sessionlist_lock);
+
v9ses->flags = V9FS_EXTENDED | V9FS_ACCESS_USER;
strcpy(v9ses->uname, V9FS_DEFUSER);
strcpy(v9ses->aname, V9FS_DEFANAME);
@@ -249,6 +281,11 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
else
fid->uid = ~0;
+#ifdef CONFIG_9P_FSCACHE
+ /* register the session for caching */
+ v9fs_cache_session_get_cookie(v9ses);
+#endif
+
return fid;
error:
@@ -268,8 +305,18 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
v9ses->clnt = NULL;
}
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->fscache) {
+ v9fs_cache_session_put_cookie(v9ses);
+ kfree(v9ses->cachetag);
+ }
+#endif
__putname(v9ses->uname);
__putname(v9ses->aname);
+
+ spin_lock(&v9fs_sessionlist_lock);
+ list_del(&v9ses->slist);
+ spin_unlock(&v9fs_sessionlist_lock);
}
/**
@@ -286,25 +333,132 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
extern int v9fs_error_init(void);
+static struct kobject *v9fs_kobj;
+
+#ifdef CONFIG_9P_FSCACHE
/**
- * v9fs_init - Initialize module
+ * caches_show - list caches associated with a session
+ *
+ * Returns the size of buffer written.
+ */
+
+static ssize_t caches_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ ssize_t n = 0, count = 0, limit = PAGE_SIZE;
+ struct v9fs_session_info *v9ses;
+
+ spin_lock(&v9fs_sessionlist_lock);
+ list_for_each_entry(v9ses, &v9fs_sessionlist, slist) {
+ if (v9ses->cachetag) {
+ n = snprintf(buf, limit, "%s\n", v9ses->cachetag);
+ if (n < 0) {
+ count = n;
+ break;
+ }
+
+ count += n;
+ limit -= n;
+ }
+ }
+
+ spin_unlock(&v9fs_sessionlist_lock);
+ return count;
+}
+
+static struct kobj_attribute v9fs_attr_cache = __ATTR_RO(caches);
+#endif /* CONFIG_9P_FSCACHE */
+
+static struct attribute *v9fs_attrs[] = {
+#ifdef CONFIG_9P_FSCACHE
+ &v9fs_attr_cache.attr,
+#endif
+ NULL,
+};
+
+static struct attribute_group v9fs_attr_group = {
+ .attrs = v9fs_attrs,
+};
+
+/**
+ * v9fs_sysfs_init - Initialize the v9fs sysfs interface
+ *
+ */
+
+static int v9fs_sysfs_init(void)
+{
+ v9fs_kobj = kobject_create_and_add("9p", fs_kobj);
+ if (!v9fs_kobj)
+ return -ENOMEM;
+
+ if (sysfs_create_group(v9fs_kobj, &v9fs_attr_group)) {
+ kobject_put(v9fs_kobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * v9fs_sysfs_cleanup - Unregister the v9fs sysfs interface
+ *
+ */
+
+static void v9fs_sysfs_cleanup(void)
+{
+ sysfs_remove_group(v9fs_kobj, &v9fs_attr_group);
+ kobject_put(v9fs_kobj);
+}
+
+/**
+ * init_v9fs - Initialize module
*
*/
static int __init init_v9fs(void)
{
+ int err;
printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
/* TODO: Setup list of registered trasnport modules */
- return register_filesystem(&v9fs_fs_type);
+ err = register_filesystem(&v9fs_fs_type);
+ if (err < 0) {
+ printk(KERN_ERR "Failed to register filesystem\n");
+ return err;
+ }
+
+ err = v9fs_cache_register();
+ if (err < 0) {
+ printk(KERN_ERR "Failed to register v9fs for caching\n");
+ goto out_fs_unreg;
+ }
+
+ err = v9fs_sysfs_init();
+ if (err < 0) {
+ printk(KERN_ERR "Failed to register with sysfs\n");
+ goto out_sysfs_cleanup;
+ }
+
+ return 0;
+
+out_sysfs_cleanup:
+ v9fs_sysfs_cleanup();
+
+out_fs_unreg:
+ unregister_filesystem(&v9fs_fs_type);
+
+ return err;
}
/**
- * v9fs_init - shutdown module
+ * exit_v9fs - shutdown module
*
*/
static void __exit exit_v9fs(void)
{
+ v9fs_sysfs_cleanup();
+ v9fs_cache_unregister();
unregister_filesystem(&v9fs_fs_type);
}
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 38762bf102a..019f4ccb70c 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -51,6 +51,7 @@ enum p9_session_flags {
enum p9_cache_modes {
CACHE_NONE,
CACHE_LOOSE,
+ CACHE_FSCACHE,
};
/**
@@ -60,6 +61,8 @@ enum p9_cache_modes {
* @debug: debug level
* @afid: authentication handle
* @cache: cache mode of type &p9_cache_modes
+ * @cachetag: the tag of the cache associated with this session
+ * @fscache: session cookie associated with FS-Cache
* @options: copy of options string given by user
* @uname: string user name to mount hierarchy as
* @aname: mount specifier for remote hierarchy
@@ -68,7 +71,7 @@ enum p9_cache_modes {
* @dfltgid: default numeric groupid to mount hierarchy as
* @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy
* @clnt: reference to 9P network client instantiated for this session
- * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug
+ * @slist: reference to list of registered 9p sessions
*
* This structure holds state for each session instance established during
* a sys_mount() .
@@ -84,6 +87,10 @@ struct v9fs_session_info {
unsigned short debug;
unsigned int afid;
unsigned int cache;
+#ifdef CONFIG_9P_FSCACHE
+ char *cachetag;
+ struct fscache_cookie *fscache;
+#endif
char *uname; /* user name to mount as */
char *aname; /* name of remote hierarchy being mounted */
@@ -92,11 +99,9 @@ struct v9fs_session_info {
unsigned int dfltgid; /* default gid for legacy support */
u32 uid; /* if ACCESS_SINGLE, the uid that has access */
struct p9_client *clnt; /* 9p client */
- struct dentry *debugfs_dir;
+ struct list_head slist; /* list of sessions registered with v9fs */
};
-extern struct dentry *v9fs_debugfs_root;
-
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
char *);
void v9fs_session_close(struct v9fs_session_info *v9ses);
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index f0c7de78e20..3a7560e3586 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -44,7 +44,13 @@ extern const struct file_operations v9fs_dir_operations;
extern const struct dentry_operations v9fs_dentry_operations;
extern const struct dentry_operations v9fs_cached_dentry_operations;
+#ifdef CONFIG_9P_FSCACHE
+struct inode *v9fs_alloc_inode(struct super_block *sb);
+void v9fs_destroy_inode(struct inode *inode);
+#endif
+
struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+void v9fs_clear_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
int v9fs_dir_release(struct inode *inode, struct file *filp);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 92828281a30..90e38449f4b 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -38,6 +38,7 @@
#include "v9fs.h"
#include "v9fs_vfs.h"
+#include "cache.h"
/**
* v9fs_vfs_readpage - read an entire page in from 9P
@@ -52,18 +53,31 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
int retval;
loff_t offset;
char *buffer;
+ struct inode *inode;
+ inode = page->mapping->host;
P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+ BUG_ON(!PageLocked(page));
+
+ retval = v9fs_readpage_from_fscache(inode, page);
+ if (retval == 0)
+ return retval;
+
buffer = kmap(page);
offset = page_offset(page);
retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset);
- if (retval < 0)
+ if (retval < 0) {
+ v9fs_uncache_page(inode, page);
goto done;
+ }
memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
flush_dcache_page(page);
SetPageUptodate(page);
+
+ v9fs_readpage_to_fscache(inode, page);
retval = 0;
done:
@@ -72,6 +86,78 @@ done:
return retval;
}
+/**
+ * v9fs_vfs_readpages - read a set of pages from 9P
+ *
+ * @filp: file being read
+ * @mapping: the address space
+ * @pages: list of pages to read
+ * @nr_pages: count of pages to read
+ *
+ */
+
+static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ int ret = 0;
+ struct inode *inode;
+
+ inode = mapping->host;
+ P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp);
+
+ ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages);
+ if (ret == 0)
+ return ret;
+
+ ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
+ P9_DPRINTK(P9_DEBUG_VFS, " = %d\n", ret);
+ return ret;
+}
+
+/**
+ * v9fs_release_page - release the private state associated with a page
+ *
+ * Returns 1 if the page can be released, false otherwise.
+ */
+
+static int v9fs_release_page(struct page *page, gfp_t gfp)
+{
+ if (PagePrivate(page))
+ return 0;
+
+ return v9fs_fscache_release_page(page, gfp);
+}
+
+/**
+ * v9fs_invalidate_page - Invalidate a page completely or partially
+ *
+ * @page: structure to page
+ * @offset: offset in the page
+ */
+
+static void v9fs_invalidate_page(struct page *page, unsigned long offset)
+{
+ if (offset == 0)
+ v9fs_fscache_invalidate_page(page);
+}
+
+/**
+ * v9fs_launder_page - Writeback a dirty page
+ * Since the writes go directly to the server, we simply return a 0
+ * here to indicate success.
+ *
+ * Returns 0 on success.
+ */
+
+static int v9fs_launder_page(struct page *page)
+{
+ return 0;
+}
+
const struct address_space_operations v9fs_addr_operations = {
.readpage = v9fs_vfs_readpage,
+ .readpages = v9fs_vfs_readpages,
+ .releasepage = v9fs_release_page,
+ .invalidatepage = v9fs_invalidate_page,
+ .launder_page = v9fs_launder_page,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 68bf2af6c38..3902bf43a08 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -32,6 +32,7 @@
#include <linux/string.h>
#include <linux/inet.h>
#include <linux/list.h>
+#include <linux/pagemap.h>
#include <asm/uaccess.h>
#include <linux/idr.h>
#include <net/9p/9p.h>
@@ -40,6 +41,7 @@
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "fid.h"
+#include "cache.h"
static const struct file_operations v9fs_cached_file_operations;
@@ -72,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
return err;
}
if (omode & P9_OTRUNC) {
- inode->i_size = 0;
+ i_size_write(inode, 0);
inode->i_blocks = 0;
}
if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses)))
@@ -85,6 +87,10 @@ int v9fs_file_open(struct inode *inode, struct file *file)
/* enable cached file options */
if(file->f_op == &v9fs_file_operations)
file->f_op = &v9fs_cached_file_operations;
+
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_cache_inode_set_cookie(inode, file);
+#endif
}
return 0;
@@ -210,6 +216,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
struct p9_client *clnt;
struct inode *inode = filp->f_path.dentry->d_inode;
int origin = *offset;
+ unsigned long pg_start, pg_end;
P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
(int)count, (int)*offset);
@@ -225,7 +232,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
if (count < rsize)
rsize = count;
- n = p9_client_write(fid, NULL, data+total, *offset+total,
+ n = p9_client_write(fid, NULL, data+total, origin+total,
rsize);
if (n <= 0)
break;
@@ -234,14 +241,14 @@ v9fs_file_write(struct file *filp, const char __user * data,
} while (count > 0);
if (total > 0) {
- invalidate_inode_pages2_range(inode->i_mapping, origin,
- origin+total);
+ pg_start = origin >> PAGE_CACHE_SHIFT;
+ pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
+ if (inode->i_mapping && inode->i_mapping->nrpages)
+ invalidate_inode_pages2_range(inode->i_mapping,
+ pg_start, pg_end);
*offset += total;
- }
-
- if (*offset > inode->i_size) {
- inode->i_size = *offset;
- inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
+ i_size_write(inode, i_size_read(inode) + total);
+ inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
}
if (n < 0)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 06a223d50a8..5947628aefe 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -40,6 +40,7 @@
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "fid.h"
+#include "cache.h"
static const struct inode_operations v9fs_dir_inode_operations;
static const struct inode_operations v9fs_dir_inode_operations_ext;
@@ -197,6 +198,39 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
wstat->extension = NULL;
}
+#ifdef CONFIG_9P_FSCACHE
+/**
+ * v9fs_alloc_inode - helper function to allocate an inode
+ * This callback is executed before setting up the inode so that we
+ * can associate a vcookie with each inode.
+ *
+ */
+
+struct inode *v9fs_alloc_inode(struct super_block *sb)
+{
+ struct v9fs_cookie *vcookie;
+ vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache,
+ GFP_KERNEL);
+ if (!vcookie)
+ return NULL;
+
+ vcookie->fscache = NULL;
+ vcookie->qid = NULL;
+ spin_lock_init(&vcookie->lock);
+ return &vcookie->inode;
+}
+
+/**
+ * v9fs_destroy_inode - destroy an inode
+ *
+ */
+
+void v9fs_destroy_inode(struct inode *inode)
+{
+ kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
+}
+#endif
+
/**
* v9fs_get_inode - helper function to setup an inode
* @sb: superblock
@@ -326,6 +360,21 @@ error:
}
*/
+
+/**
+ * v9fs_clear_inode - release an inode
+ * @inode: inode to release
+ *
+ */
+void v9fs_clear_inode(struct inode *inode)
+{
+ filemap_fdatawrite(inode->i_mapping);
+
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_cache_inode_put_cookie(inode);
+#endif
+}
+
/**
* v9fs_inode_from_fid - populate an inode by issuing a attribute request
* @v9ses: session information
@@ -356,8 +405,14 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
v9fs_stat2inode(st, ret, sb);
ret->i_ino = v9fs_qid2ino(&st->qid);
+
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_vcookie_set_qid(ret, &st->qid);
+ v9fs_cache_inode_get_cookie(ret);
+#endif
p9stat_free(st);
kfree(st);
+
return ret;
error:
@@ -751,7 +806,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
err = -EPERM;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
- if (v9ses->cache == CACHE_LOOSE)
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
return simple_getattr(mnt, dentry, stat);
fid = v9fs_fid_lookup(dentry);
@@ -872,10 +927,10 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
} else
inode->i_rdev = 0;
- inode->i_size = stat->length;
+ i_size_write(inode, stat->length);
/* not real number of blocks, but 512 byte ones ... */
- inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
+ inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
}
/**
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 8961f1a8f66..14a86448572 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -44,21 +44,9 @@
#include "v9fs_vfs.h"
#include "fid.h"
-static void v9fs_clear_inode(struct inode *);
static const struct super_operations v9fs_super_ops;
/**
- * v9fs_clear_inode - release an inode
- * @inode: inode to release
- *
- */
-
-static void v9fs_clear_inode(struct inode *inode)
-{
- filemap_fdatawrite(inode->i_mapping);
-}
-
-/**
* v9fs_set_super - set the superblock
* @s: super block
* @data: file system specific data
@@ -220,6 +208,10 @@ v9fs_umount_begin(struct super_block *sb)
}
static const struct super_operations v9fs_super_ops = {
+#ifdef CONFIG_9P_FSCACHE
+ .alloc_inode = v9fs_alloc_inode,
+ .destroy_inode = v9fs_destroy_inode,
+#endif
.statfs = simple_statfs,
.clear_inode = v9fs_clear_inode,
.show_options = generic_show_options,
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 798cb071d13..3f57ce4bee5 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -19,9 +19,6 @@ static int
adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh,
int create)
{
- if (block < 0)
- goto abort_negative;
-
if (!create) {
if (block >= inode->i_blocks)
goto abort_toobig;
@@ -34,10 +31,6 @@ adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh,
/* don't support allocation of blocks yet */
return -EIO;
-abort_negative:
- adfs_error(inode->i_sb, "block %d < 0", block);
- return -EIO;
-
abort_toobig:
return 0;
}
diff --git a/fs/attr.c b/fs/attr.c
index 9fe1b1bd30a..96d394bdadd 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,7 +18,7 @@
/* Taken over from the old code... */
/* POSIX UID/GID verification for setting inode attributes. */
-int inode_change_ok(struct inode *inode, struct iattr *attr)
+int inode_change_ok(const struct inode *inode, struct iattr *attr)
{
int retval = -EPERM;
unsigned int ia_valid = attr->ia_valid;
@@ -60,9 +60,51 @@ fine:
error:
return retval;
}
-
EXPORT_SYMBOL(inode_change_ok);
+/**
+ * inode_newsize_ok - may this inode be truncated to a given size
+ * @inode: the inode to be truncated
+ * @offset: the new size to assign to the inode
+ * @Returns: 0 on success, -ve errno on failure
+ *
+ * inode_newsize_ok will check filesystem limits and ulimits to check that the
+ * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
+ * when necessary. Caller must not proceed with inode size change if failure is
+ * returned. @inode must be a file (not directory), with appropriate
+ * permissions to allow truncate (inode_newsize_ok does NOT check these
+ * conditions).
+ *
+ * inode_newsize_ok must be called with i_mutex held.
+ */
+int inode_newsize_ok(const struct inode *inode, loff_t offset)
+{
+ if (inode->i_size < offset) {
+ unsigned long limit;
+
+ limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+ if (limit != RLIM_INFINITY && offset > limit)
+ goto out_sig;
+ if (offset > inode->i_sb->s_maxbytes)
+ goto out_big;
+ } else {
+ /*
+ * truncation of in-use swapfiles is disallowed - it would
+ * cause subsequent swapout to scribble on the now-freed
+ * blocks.
+ */
+ if (IS_SWAPFILE(inode))
+ return -ETXTBSY;
+ }
+
+ return 0;
+out_sig:
+ send_sig(SIGXFSZ, current, 0);
+out_big:
+ return -EFBIG;
+}
+EXPORT_SYMBOL(inode_newsize_ok);
+
int inode_setattr(struct inode * inode, struct iattr * attr)
{
unsigned int ia_valid = attr->ia_valid;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dd376c124e7..33baf27fac7 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb)
{
kfree(BEFS_SB(sb)->mount_opts.iocharset);
BEFS_SB(sb)->mount_opts.iocharset = NULL;
-
- if (BEFS_SB(sb)->nls) {
- unload_nls(BEFS_SB(sb)->nls);
- BEFS_SB(sb)->nls = NULL;
- }
-
+ unload_nls(BEFS_SB(sb)->nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 442d94fe255..b9b3bb51b1e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1711,42 +1711,52 @@ struct elf_note_info {
int numnote;
};
-static int fill_note_info(struct elfhdr *elf, int phdrs,
- struct elf_note_info *info,
- long signr, struct pt_regs *regs)
+static int elf_note_info_init(struct elf_note_info *info)
{
-#define NUM_NOTES 6
- struct list_head *t;
-
- info->notes = NULL;
- info->prstatus = NULL;
- info->psinfo = NULL;
- info->fpu = NULL;
-#ifdef ELF_CORE_COPY_XFPREGS
- info->xfpu = NULL;
-#endif
+ memset(info, 0, sizeof(*info));
INIT_LIST_HEAD(&info->thread_list);
- info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
- GFP_KERNEL);
+ /* Allocate space for six ELF notes */
+ info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
if (!info->notes)
return 0;
info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
if (!info->psinfo)
- return 0;
+ goto notes_free;
info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
if (!info->prstatus)
- return 0;
+ goto psinfo_free;
info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
if (!info->fpu)
- return 0;
+ goto prstatus_free;
#ifdef ELF_CORE_COPY_XFPREGS
info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
if (!info->xfpu)
- return 0;
+ goto fpu_free;
+#endif
+ return 1;
+#ifdef ELF_CORE_COPY_XFPREGS
+ fpu_free:
+ kfree(info->fpu);
#endif
+ prstatus_free:
+ kfree(info->prstatus);
+ psinfo_free:
+ kfree(info->psinfo);
+ notes_free:
+ kfree(info->notes);
+ return 0;
+}
+
+static int fill_note_info(struct elfhdr *elf, int phdrs,
+ struct elf_note_info *info,
+ long signr, struct pt_regs *regs)
+{
+ struct list_head *t;
+
+ if (!elf_note_info_init(info))
+ return 0;
- info->thread_status_size = 0;
if (signr) {
struct core_thread *ct;
struct elf_thread_status *ets;
@@ -1806,8 +1816,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
#endif
return 1;
-
-#undef NUM_NOTES
}
static size_t get_note_info_size(struct elf_note_info *info)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 76285471073..38502c67987 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -283,20 +283,23 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
}
stack_size = exec_params.stack_size;
- if (stack_size < interp_params.stack_size)
- stack_size = interp_params.stack_size;
-
if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
executable_stack = EXSTACK_ENABLE_X;
else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
executable_stack = EXSTACK_DISABLE_X;
- else if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
- executable_stack = EXSTACK_ENABLE_X;
- else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
- executable_stack = EXSTACK_DISABLE_X;
else
executable_stack = EXSTACK_DEFAULT;
+ if (stack_size == 0) {
+ stack_size = interp_params.stack_size;
+ if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
+ executable_stack = EXSTACK_ENABLE_X;
+ else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
+ executable_stack = EXSTACK_DISABLE_X;
+ else
+ executable_stack = EXSTACK_DEFAULT;
+ }
+
retval = -ENOEXEC;
if (stack_size == 0)
goto error;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e92f229e3c6..a2796651e75 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -278,8 +278,6 @@ static int decompress_exec(
ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
if (ret <= 0)
break;
- if (ret >= (unsigned long) -4096)
- break;
len -= ret;
strm.next_in = buf;
@@ -335,7 +333,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
"(%d != %d)", (unsigned) r, curid, id);
goto failed;
} else if ( ! p->lib_list[id].loaded &&
- load_flat_shared_library(id, p) > (unsigned long) -4096) {
+ IS_ERR_VALUE(load_flat_shared_library(id, p))) {
printk("BINFMT_FLAT: failed to load library %d", id);
goto failed;
}
@@ -545,7 +543,7 @@ static int load_flat_file(struct linux_binprm * bprm,
textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_EXECUTABLE, 0);
up_write(&current->mm->mmap_sem);
- if (!textpos || textpos >= (unsigned long) -4096) {
+ if (!textpos || IS_ERR_VALUE(textpos)) {
if (!textpos)
textpos = (unsigned long) -ENOMEM;
printk("Unable to mmap process text, errno %d\n", (int)-textpos);
@@ -560,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
up_write(&current->mm->mmap_sem);
- if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) {
+ if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
if (!realdatastart)
realdatastart = (unsigned long) -ENOMEM;
printk("Unable to allocate RAM for process data, errno %d\n",
@@ -587,7 +585,7 @@ static int load_flat_file(struct linux_binprm * bprm,
result = bprm->file->f_op->read(bprm->file, (char *) datapos,
data_len + (relocs * sizeof(unsigned long)), &fpos);
}
- if (result >= (unsigned long)-4096) {
+ if (IS_ERR_VALUE(result)) {
printk("Unable to read data+bss, errno %d\n", (int)-result);
do_munmap(current->mm, textpos, text_len);
do_munmap(current->mm, realdatastart, data_len + extra);
@@ -607,7 +605,7 @@ static int load_flat_file(struct linux_binprm * bprm,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
up_write(&current->mm->mmap_sem);
- if (!textpos || textpos >= (unsigned long) -4096) {
+ if (!textpos || IS_ERR_VALUE(textpos)) {
if (!textpos)
textpos = (unsigned long) -ENOMEM;
printk("Unable to allocate RAM for process text/data, errno %d\n",
@@ -641,7 +639,7 @@ static int load_flat_file(struct linux_binprm * bprm,
fpos = 0;
result = bprm->file->f_op->read(bprm->file,
(char *) textpos, text_len, &fpos);
- if (result < (unsigned long) -4096)
+ if (!IS_ERR_VALUE(result))
result = decompress_exec(bprm, text_len, (char *) datapos,
data_len + (relocs * sizeof(unsigned long)), 0);
}
@@ -651,13 +649,13 @@ static int load_flat_file(struct linux_binprm * bprm,
fpos = 0;
result = bprm->file->f_op->read(bprm->file,
(char *) textpos, text_len, &fpos);
- if (result < (unsigned long) -4096) {
+ if (!IS_ERR_VALUE(result)) {
fpos = ntohl(hdr->data_start);
result = bprm->file->f_op->read(bprm->file, (char *) datapos,
data_len + (relocs * sizeof(unsigned long)), &fpos);
}
}
- if (result >= (unsigned long)-4096) {
+ if (IS_ERR_VALUE(result)) {
printk("Unable to read code+data+bss, errno %d\n",(int)-result);
do_munmap(current->mm, textpos, text_len + data_len + extra +
MAX_SHARED_LIBS * sizeof(unsigned long));
@@ -835,7 +833,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
res = prepare_binprm(&bprm);
- if (res <= (unsigned long)-4096)
+ if (!IS_ERR_VALUE(res))
res = load_flat_file(&bprm, libs, id, NULL);
abort_creds(bprm.cred);
@@ -880,7 +878,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */
res = load_flat_file(bprm, &libinfo, 0, &stack_len);
- if (res > (unsigned long)-4096)
+ if (IS_ERR_VALUE(res))
return res;
/* Update data segment pointers for all libraries */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5d1ed50bd46..9cf4b926f8e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev);
* freeze_bdev -- lock a filesystem and force it into a consistent state
* @bdev: blockdevice to lock
*
- * This takes the block device bd_mount_sem to make sure no new mounts
- * happen on bdev until thaw_bdev() is called.
* If a superblock is found on this device, we take the s_umount semaphore
* on it to make sure nobody unmounts until the snapshot creation is done.
* The reference counter (bd_fsfreeze_count) guarantees that only the last
@@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev)
int error = 0;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (bdev->bd_fsfreeze_count > 0) {
- bdev->bd_fsfreeze_count++;
+ if (++bdev->bd_fsfreeze_count > 1) {
+ /*
+ * We don't even need to grab a reference - the first call
+ * to freeze_bdev grab an active reference and only the last
+ * thaw_bdev drops it.
+ */
sb = get_super(bdev);
+ drop_super(sb);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return sb;
}
- bdev->bd_fsfreeze_count++;
-
- down(&bdev->bd_mount_sem);
- sb = get_super(bdev);
- if (sb && !(sb->s_flags & MS_RDONLY)) {
- sb->s_frozen = SB_FREEZE_WRITE;
- smp_wmb();
-
- sync_filesystem(sb);
-
- sb->s_frozen = SB_FREEZE_TRANS;
- smp_wmb();
-
- sync_blockdev(sb->s_bdev);
-
- if (sb->s_op->freeze_fs) {
- error = sb->s_op->freeze_fs(sb);
- if (error) {
- printk(KERN_ERR
- "VFS:Filesystem freeze failed\n");
- sb->s_frozen = SB_UNFROZEN;
- drop_super(sb);
- up(&bdev->bd_mount_sem);
- bdev->bd_fsfreeze_count--;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return ERR_PTR(error);
- }
+
+ sb = get_active_super(bdev);
+ if (!sb)
+ goto out;
+ if (sb->s_flags & MS_RDONLY) {
+ deactivate_locked_super(sb);
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return sb;
+ }
+
+ sb->s_frozen = SB_FREEZE_WRITE;
+ smp_wmb();
+
+ sync_filesystem(sb);
+
+ sb->s_frozen = SB_FREEZE_TRANS;
+ smp_wmb();
+
+ sync_blockdev(sb->s_bdev);
+
+ if (sb->s_op->freeze_fs) {
+ error = sb->s_op->freeze_fs(sb);
+ if (error) {
+ printk(KERN_ERR
+ "VFS:Filesystem freeze failed\n");
+ sb->s_frozen = SB_UNFROZEN;
+ deactivate_locked_super(sb);
+ bdev->bd_fsfreeze_count--;
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return ERR_PTR(error);
}
}
+ up_write(&sb->s_umount);
+ out:
sync_blockdev(bdev);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
-
- return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
+ return sb; /* thaw_bdev releases s->s_umount */
}
EXPORT_SYMBOL(freeze_bdev);
@@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev);
*/
int thaw_bdev(struct block_device *bdev, struct super_block *sb)
{
- int error = 0;
+ int error = -EINVAL;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (!bdev->bd_fsfreeze_count) {
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return -EINVAL;
- }
-
- bdev->bd_fsfreeze_count--;
- if (bdev->bd_fsfreeze_count > 0) {
- if (sb)
- drop_super(sb);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return 0;
- }
-
- if (sb) {
- BUG_ON(sb->s_bdev != bdev);
- if (!(sb->s_flags & MS_RDONLY)) {
- if (sb->s_op->unfreeze_fs) {
- error = sb->s_op->unfreeze_fs(sb);
- if (error) {
- printk(KERN_ERR
- "VFS:Filesystem thaw failed\n");
- sb->s_frozen = SB_FREEZE_TRANS;
- bdev->bd_fsfreeze_count++;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return error;
- }
- }
- sb->s_frozen = SB_UNFROZEN;
- smp_wmb();
- wake_up(&sb->s_wait_unfrozen);
+ if (!bdev->bd_fsfreeze_count)
+ goto out_unlock;
+
+ error = 0;
+ if (--bdev->bd_fsfreeze_count > 0)
+ goto out_unlock;
+
+ if (!sb)
+ goto out_unlock;
+
+ BUG_ON(sb->s_bdev != bdev);
+ down_write(&sb->s_umount);
+ if (sb->s_flags & MS_RDONLY)
+ goto out_deactivate;
+
+ if (sb->s_op->unfreeze_fs) {
+ error = sb->s_op->unfreeze_fs(sb);
+ if (error) {
+ printk(KERN_ERR
+ "VFS:Filesystem thaw failed\n");
+ sb->s_frozen = SB_FREEZE_TRANS;
+ bdev->bd_fsfreeze_count++;
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return error;
}
- drop_super(sb);
}
- up(&bdev->bd_mount_sem);
+ sb->s_frozen = SB_UNFROZEN;
+ smp_wmb();
+ wake_up(&sb->s_wait_unfrozen);
+
+out_deactivate:
+ if (sb)
+ deactivate_locked_super(sb);
+out_unlock:
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return 0;
}
@@ -430,7 +437,6 @@ static void init_once(void *foo)
memset(bdev, 0, sizeof(*bdev));
mutex_init(&bdev->bd_mutex);
- sema_init(&bdev->bd_mount_sem, 1);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
#ifdef CONFIG_SYSFS
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 019e8af449a..282ca085c2f 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -48,6 +48,9 @@ struct btrfs_worker_thread {
/* number of things on the pending list */
atomic_t num_pending;
+ /* reference counter for this struct */
+ atomic_t refs;
+
unsigned long sequence;
/* protects the pending list. */
@@ -71,7 +74,12 @@ static void check_idle_worker(struct btrfs_worker_thread *worker)
unsigned long flags;
spin_lock_irqsave(&worker->workers->lock, flags);
worker->idle = 1;
- list_move(&worker->worker_list, &worker->workers->idle_list);
+
+ /* the list may be empty if the worker is just starting */
+ if (!list_empty(&worker->worker_list)) {
+ list_move(&worker->worker_list,
+ &worker->workers->idle_list);
+ }
spin_unlock_irqrestore(&worker->workers->lock, flags);
}
}
@@ -87,23 +95,49 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
unsigned long flags;
spin_lock_irqsave(&worker->workers->lock, flags);
worker->idle = 0;
- list_move_tail(&worker->worker_list,
- &worker->workers->worker_list);
+
+ if (!list_empty(&worker->worker_list)) {
+ list_move_tail(&worker->worker_list,
+ &worker->workers->worker_list);
+ }
spin_unlock_irqrestore(&worker->workers->lock, flags);
}
}
-static noinline int run_ordered_completions(struct btrfs_workers *workers,
- struct btrfs_work *work)
+static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
{
+ struct btrfs_workers *workers = worker->workers;
unsigned long flags;
+ rmb();
+ if (!workers->atomic_start_pending)
+ return;
+
+ spin_lock_irqsave(&workers->lock, flags);
+ if (!workers->atomic_start_pending)
+ goto out;
+
+ workers->atomic_start_pending = 0;
+ if (workers->num_workers >= workers->max_workers)
+ goto out;
+
+ spin_unlock_irqrestore(&workers->lock, flags);
+ btrfs_start_workers(workers, 1);
+ return;
+
+out:
+ spin_unlock_irqrestore(&workers->lock, flags);
+}
+
+static noinline int run_ordered_completions(struct btrfs_workers *workers,
+ struct btrfs_work *work)
+{
if (!workers->ordered)
return 0;
set_bit(WORK_DONE_BIT, &work->flags);
- spin_lock_irqsave(&workers->lock, flags);
+ spin_lock(&workers->order_lock);
while (1) {
if (!list_empty(&workers->prio_order_list)) {
@@ -126,45 +160,118 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
break;
- spin_unlock_irqrestore(&workers->lock, flags);
+ spin_unlock(&workers->order_lock);
work->ordered_func(work);
/* now take the lock again and call the freeing code */
- spin_lock_irqsave(&workers->lock, flags);
+ spin_lock(&workers->order_lock);
list_del(&work->order_list);
work->ordered_free(work);
}
- spin_unlock_irqrestore(&workers->lock, flags);
+ spin_unlock(&workers->order_lock);
return 0;
}
+static void put_worker(struct btrfs_worker_thread *worker)
+{
+ if (atomic_dec_and_test(&worker->refs))
+ kfree(worker);
+}
+
+static int try_worker_shutdown(struct btrfs_worker_thread *worker)
+{
+ int freeit = 0;
+
+ spin_lock_irq(&worker->lock);
+ spin_lock(&worker->workers->lock);
+ if (worker->workers->num_workers > 1 &&
+ worker->idle &&
+ !worker->working &&
+ !list_empty(&worker->worker_list) &&
+ list_empty(&worker->prio_pending) &&
+ list_empty(&worker->pending) &&
+ atomic_read(&worker->num_pending) == 0) {
+ freeit = 1;
+ list_del_init(&worker->worker_list);
+ worker->workers->num_workers--;
+ }
+ spin_unlock(&worker->workers->lock);
+ spin_unlock_irq(&worker->lock);
+
+ if (freeit)
+ put_worker(worker);
+ return freeit;
+}
+
+static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
+ struct list_head *prio_head,
+ struct list_head *head)
+{
+ struct btrfs_work *work = NULL;
+ struct list_head *cur = NULL;
+
+ if(!list_empty(prio_head))
+ cur = prio_head->next;
+
+ smp_mb();
+ if (!list_empty(&worker->prio_pending))
+ goto refill;
+
+ if (!list_empty(head))
+ cur = head->next;
+
+ if (cur)
+ goto out;
+
+refill:
+ spin_lock_irq(&worker->lock);
+ list_splice_tail_init(&worker->prio_pending, prio_head);
+ list_splice_tail_init(&worker->pending, head);
+
+ if (!list_empty(prio_head))
+ cur = prio_head->next;
+ else if (!list_empty(head))
+ cur = head->next;
+ spin_unlock_irq(&worker->lock);
+
+ if (!cur)
+ goto out_fail;
+
+out:
+ work = list_entry(cur, struct btrfs_work, list);
+
+out_fail:
+ return work;
+}
+
/*
* main loop for servicing work items
*/
static int worker_loop(void *arg)
{
struct btrfs_worker_thread *worker = arg;
- struct list_head *cur;
+ struct list_head head;
+ struct list_head prio_head;
struct btrfs_work *work;
+
+ INIT_LIST_HEAD(&head);
+ INIT_LIST_HEAD(&prio_head);
+
do {
- spin_lock_irq(&worker->lock);
-again_locked:
+again:
while (1) {
- if (!list_empty(&worker->prio_pending))
- cur = worker->prio_pending.next;
- else if (!list_empty(&worker->pending))
- cur = worker->pending.next;
- else
+
+
+ work = get_next_work(worker, &prio_head, &head);
+ if (!work)
break;
- work = list_entry(cur, struct btrfs_work, list);
list_del(&work->list);
clear_bit(WORK_QUEUED_BIT, &work->flags);
work->worker = worker;
- spin_unlock_irq(&worker->lock);
work->func(work);
@@ -175,9 +282,13 @@ again_locked:
*/
run_ordered_completions(worker->workers, work);
- spin_lock_irq(&worker->lock);
- check_idle_worker(worker);
+ check_pending_worker_creates(worker);
+
}
+
+ spin_lock_irq(&worker->lock);
+ check_idle_worker(worker);
+
if (freezing(current)) {
worker->working = 0;
spin_unlock_irq(&worker->lock);
@@ -216,8 +327,10 @@ again_locked:
spin_lock_irq(&worker->lock);
set_current_state(TASK_INTERRUPTIBLE);
if (!list_empty(&worker->pending) ||
- !list_empty(&worker->prio_pending))
- goto again_locked;
+ !list_empty(&worker->prio_pending)) {
+ spin_unlock_irq(&worker->lock);
+ goto again;
+ }
/*
* this makes sure we get a wakeup when someone
@@ -226,8 +339,13 @@ again_locked:
worker->working = 0;
spin_unlock_irq(&worker->lock);
- if (!kthread_should_stop())
- schedule();
+ if (!kthread_should_stop()) {
+ schedule_timeout(HZ * 120);
+ if (!worker->working &&
+ try_worker_shutdown(worker)) {
+ return 0;
+ }
+ }
}
__set_current_state(TASK_RUNNING);
}
@@ -242,16 +360,30 @@ int btrfs_stop_workers(struct btrfs_workers *workers)
{
struct list_head *cur;
struct btrfs_worker_thread *worker;
+ int can_stop;
+ spin_lock_irq(&workers->lock);
list_splice_init(&workers->idle_list, &workers->worker_list);
while (!list_empty(&workers->worker_list)) {
cur = workers->worker_list.next;
worker = list_entry(cur, struct btrfs_worker_thread,
worker_list);
- kthread_stop(worker->task);
- list_del(&worker->worker_list);
- kfree(worker);
+
+ atomic_inc(&worker->refs);
+ workers->num_workers -= 1;
+ if (!list_empty(&worker->worker_list)) {
+ list_del_init(&worker->worker_list);
+ put_worker(worker);
+ can_stop = 1;
+ } else
+ can_stop = 0;
+ spin_unlock_irq(&workers->lock);
+ if (can_stop)
+ kthread_stop(worker->task);
+ spin_lock_irq(&workers->lock);
+ put_worker(worker);
}
+ spin_unlock_irq(&workers->lock);
return 0;
}
@@ -266,10 +398,13 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
INIT_LIST_HEAD(&workers->order_list);
INIT_LIST_HEAD(&workers->prio_order_list);
spin_lock_init(&workers->lock);
+ spin_lock_init(&workers->order_lock);
workers->max_workers = max;
workers->idle_thresh = 32;
workers->name = name;
workers->ordered = 0;
+ workers->atomic_start_pending = 0;
+ workers->atomic_worker_start = 0;
}
/*
@@ -293,7 +428,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
INIT_LIST_HEAD(&worker->prio_pending);
INIT_LIST_HEAD(&worker->worker_list);
spin_lock_init(&worker->lock);
+
atomic_set(&worker->num_pending, 0);
+ atomic_set(&worker->refs, 1);
worker->workers = workers;
worker->task = kthread_run(worker_loop, worker,
"btrfs-%s-%d", workers->name,
@@ -303,7 +440,6 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
kfree(worker);
goto fail;
}
-
spin_lock_irq(&workers->lock);
list_add_tail(&worker->worker_list, &workers->idle_list);
worker->idle = 1;
@@ -350,7 +486,6 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
*/
next = workers->worker_list.next;
worker = list_entry(next, struct btrfs_worker_thread, worker_list);
- atomic_inc(&worker->num_pending);
worker->sequence++;
if (worker->sequence % workers->idle_thresh == 0)
@@ -367,28 +502,18 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
{
struct btrfs_worker_thread *worker;
unsigned long flags;
+ struct list_head *fallback;
again:
spin_lock_irqsave(&workers->lock, flags);
worker = next_worker(workers);
- spin_unlock_irqrestore(&workers->lock, flags);
if (!worker) {
- spin_lock_irqsave(&workers->lock, flags);
if (workers->num_workers >= workers->max_workers) {
- struct list_head *fallback = NULL;
- /*
- * we have failed to find any workers, just
- * return the force one
- */
- if (!list_empty(&workers->worker_list))
- fallback = workers->worker_list.next;
- if (!list_empty(&workers->idle_list))
- fallback = workers->idle_list.next;
- BUG_ON(!fallback);
- worker = list_entry(fallback,
- struct btrfs_worker_thread, worker_list);
- spin_unlock_irqrestore(&workers->lock, flags);
+ goto fallback;
+ } else if (workers->atomic_worker_start) {
+ workers->atomic_start_pending = 1;
+ goto fallback;
} else {
spin_unlock_irqrestore(&workers->lock, flags);
/* we're below the limit, start another worker */
@@ -396,6 +521,28 @@ again:
goto again;
}
}
+ goto found;
+
+fallback:
+ fallback = NULL;
+ /*
+ * we have failed to find any workers, just
+ * return the first one we can find.
+ */
+ if (!list_empty(&workers->worker_list))
+ fallback = workers->worker_list.next;
+ if (!list_empty(&workers->idle_list))
+ fallback = workers->idle_list.next;
+ BUG_ON(!fallback);
+ worker = list_entry(fallback,
+ struct btrfs_worker_thread, worker_list);
+found:
+ /*
+ * this makes sure the worker doesn't exit before it is placed
+ * onto a busy/idle list
+ */
+ atomic_inc(&worker->num_pending);
+ spin_unlock_irqrestore(&workers->lock, flags);
return worker;
}
@@ -427,7 +574,7 @@ int btrfs_requeue_work(struct btrfs_work *work)
spin_lock(&worker->workers->lock);
worker->idle = 0;
list_move_tail(&worker->worker_list,
- &worker->workers->worker_list);
+ &worker->workers->worker_list);
spin_unlock(&worker->workers->lock);
}
if (!worker->working) {
@@ -435,9 +582,9 @@ int btrfs_requeue_work(struct btrfs_work *work)
worker->working = 1;
}
- spin_unlock_irqrestore(&worker->lock, flags);
if (wake)
wake_up_process(worker->task);
+ spin_unlock_irqrestore(&worker->lock, flags);
out:
return 0;
@@ -463,14 +610,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
worker = find_worker(workers);
if (workers->ordered) {
- spin_lock_irqsave(&workers->lock, flags);
+ /*
+ * you're not allowed to do ordered queues from an
+ * interrupt handler
+ */
+ spin_lock(&workers->order_lock);
if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
list_add_tail(&work->order_list,
&workers->prio_order_list);
} else {
list_add_tail(&work->order_list, &workers->order_list);
}
- spin_unlock_irqrestore(&workers->lock, flags);
+ spin_unlock(&workers->order_lock);
} else {
INIT_LIST_HEAD(&work->order_list);
}
@@ -481,7 +632,6 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
list_add_tail(&work->list, &worker->prio_pending);
else
list_add_tail(&work->list, &worker->pending);
- atomic_inc(&worker->num_pending);
check_busy_worker(worker);
/*
@@ -492,10 +642,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
wake = 1;
worker->working = 1;
- spin_unlock_irqrestore(&worker->lock, flags);
-
if (wake)
wake_up_process(worker->task);
+ spin_unlock_irqrestore(&worker->lock, flags);
+
out:
return 0;
}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 1b511c109db..fc089b95ec1 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -73,6 +73,15 @@ struct btrfs_workers {
/* force completions in the order they were queued */
int ordered;
+ /* more workers required, but in an interrupt handler */
+ int atomic_start_pending;
+
+ /*
+ * are we allowed to sleep while starting workers or are we required
+ * to start them at a later time?
+ */
+ int atomic_worker_start;
+
/* list with all the work threads. The workers on the idle thread
* may be actively servicing jobs, but they haven't yet hit the
* idle thresh limit above.
@@ -90,6 +99,9 @@ struct btrfs_workers {
/* lock for finding the next worker thread to queue on */
spinlock_t lock;
+ /* lock for the ordered lists */
+ spinlock_t order_lock;
+
/* extra name for this worker, used for current->name */
char *name;
};
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ea1ea0af8c0..82ee56bba29 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -138,6 +138,7 @@ struct btrfs_inode {
* of these.
*/
unsigned ordered_data_close:1;
+ unsigned dummy_inode:1;
struct inode vfs_inode;
};
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9d8ba4d54a3..a11a32058b5 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
*/
set_page_extent_mapped(page);
lock_extent(tree, last_offset, end, GFP_NOFS);
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, last_offset,
PAGE_CACHE_SIZE);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em || last_offset < em->start ||
(last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
@@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
em_tree = &BTRFS_I(inode)->extent_tree;
/* we need the actual starting offset of this extent in the file */
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree,
page_offset(bio->bi_io_vec->bv_page),
PAGE_CACHE_SIZE);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3fdcc0512d3..ec96f3a6d53 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2853,6 +2853,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
int split;
int num_doubles = 0;
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (extend && data_size + btrfs_item_size_nr(l, slot) +
+ sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
+ return -EOVERFLOW;
+
/* first try to make some room by pushing left and right */
if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
wret = push_leaf_right(trans, root, path, data_size, 0);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 837435ce84c..80599b4e42b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -114,6 +114,10 @@ struct btrfs_ordered_sum;
*/
#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+#define BTRFS_BTREE_INODE_OBJECTID 1
+
+#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
+
/*
* we can actually store much bigger names, but lets not confuse the rest
* of linux
@@ -670,6 +674,7 @@ struct btrfs_space_info {
u64 bytes_reserved; /* total bytes the allocator has reserved for
current allocations */
u64 bytes_readonly; /* total bytes that are read only */
+ u64 bytes_super; /* total bytes reserved for the super blocks */
/* delalloc accounting */
u64 bytes_delalloc; /* number of bytes reserved for allocation,
@@ -726,6 +731,15 @@ enum btrfs_caching_type {
BTRFS_CACHE_FINISHED = 2,
};
+struct btrfs_caching_control {
+ struct list_head list;
+ struct mutex mutex;
+ wait_queue_head_t wait;
+ struct btrfs_block_group_cache *block_group;
+ u64 progress;
+ atomic_t count;
+};
+
struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
@@ -733,6 +747,7 @@ struct btrfs_block_group_cache {
spinlock_t lock;
u64 pinned;
u64 reserved;
+ u64 bytes_super;
u64 flags;
u64 sectorsize;
int extents_thresh;
@@ -742,8 +757,9 @@ struct btrfs_block_group_cache {
int dirty;
/* cache tracking stuff */
- wait_queue_head_t caching_q;
int cached;
+ struct btrfs_caching_control *caching_ctl;
+ u64 last_byte_to_unpin;
struct btrfs_space_info *space_info;
@@ -782,13 +798,16 @@ struct btrfs_fs_info {
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
+
+ spinlock_t fs_roots_radix_lock;
struct radix_tree_root fs_roots_radix;
/* block group cache stuff */
spinlock_t block_group_cache_lock;
struct rb_root block_group_cache_tree;
- struct extent_io_tree pinned_extents;
+ struct extent_io_tree freed_extents[2];
+ struct extent_io_tree *pinned_extents;
/* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree;
@@ -822,11 +841,7 @@ struct btrfs_fs_info {
struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex;
struct mutex chunk_mutex;
- struct mutex drop_mutex;
struct mutex volume_mutex;
- struct mutex tree_reloc_mutex;
- struct rw_semaphore extent_commit_sem;
-
/*
* this protects the ordered operations list only while we are
* processing all of the entries on it. This way we make
@@ -835,10 +850,16 @@ struct btrfs_fs_info {
* before jumping into the main commit.
*/
struct mutex ordered_operations_mutex;
+ struct rw_semaphore extent_commit_sem;
+
+ struct rw_semaphore subvol_sem;
+
+ struct srcu_struct subvol_srcu;
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
+ struct list_head caching_block_groups;
atomic_t nr_async_submits;
atomic_t async_submit_draining;
@@ -996,10 +1017,12 @@ struct btrfs_root {
u32 stripesize;
u32 type;
- u64 highest_inode;
- u64 last_inode_alloc;
+
+ u64 highest_objectid;
int ref_cows;
int track_dirty;
+ int in_radix;
+
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
@@ -1920,8 +1943,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
-int btrfs_update_pinned_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int pin);
+int btrfs_pin_extent(struct btrfs_root *root,
+ u64 bytenr, u64 num, int reserved);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
@@ -1971,9 +1994,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 owner, u64 offset);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_io_tree *unpin);
+ struct btrfs_root *root);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -1984,6 +2008,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_root *root);
+int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytes_used,
u64 type, u64 chunk_objectid, u64 chunk_offset,
@@ -2006,7 +2031,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
-void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -2100,12 +2124,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct extent_buffer *parent);
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
- struct btrfs_path *path,
- u64 root_id, u64 ref_id);
+ struct btrfs_path *path,
+ u64 root_id, u64 ref_id);
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
- u64 root_id, u8 type, u64 ref_id,
- u64 dirid, u64 sequence,
+ u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
+ const char *name, int name_len);
+int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *tree_root,
+ u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
const char *name, int name_len);
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_key *key);
@@ -2120,6 +2147,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
int btrfs_search_root(struct btrfs_root *root, u64 search_start,
u64 *found_objectid);
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
+int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
int btrfs_set_root_node(struct btrfs_root_item *item,
struct extent_buffer *node);
/* dir-item.c */
@@ -2138,6 +2166,10 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u64 dir,
u64 objectid, const char *name, int name_len,
int mod);
+struct btrfs_dir_item *
+btrfs_search_dir_index_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 dirid,
+ const char *name, int name_len);
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len);
@@ -2160,6 +2192,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset);
int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset);
+int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
/* inode-map.c */
int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
@@ -2232,6 +2265,10 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct inode *parent_inode, struct inode *inode,
const char *name, int name_len, int add_backref, u64 index);
+int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *dir, u64 objectid,
+ const char *name, int name_len);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode, u64 new_size,
@@ -2242,7 +2279,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *new_root, struct dentry *dentry,
+ struct btrfs_root *new_root,
u64 new_dirid, u64 alloc_hint);
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio, unsigned long bio_flags);
@@ -2258,6 +2295,7 @@ int btrfs_write_inode(struct inode *inode, int wait);
void btrfs_dirty_inode(struct inode *inode);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
+void btrfs_drop_inode(struct inode *inode);
int btrfs_init_cachep(void);
void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file);
@@ -2275,6 +2313,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
void btrfs_orphan_cleanup(struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t size);
+int btrfs_invalidate_inodes(struct btrfs_root *root);
+extern struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -2290,7 +2330,7 @@ extern struct file_operations btrfs_file_operations;
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
u64 start, u64 end, u64 locked_end,
- u64 inline_limit, u64 *hint_block);
+ u64 inline_limit, u64 *hint_block, int drop_cache);
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 1d70236ba00..f3a6075519c 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -281,6 +281,53 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
return btrfs_match_dir_item_name(root, path, name, name_len);
}
+struct btrfs_dir_item *
+btrfs_search_dir_index_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 dirid,
+ const char *name, int name_len)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_dir_item *di;
+ struct btrfs_key key;
+ u32 nritems;
+ int ret;
+
+ key.objectid = dirid;
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+
+ while (1) {
+ if (path->slots[0] >= nritems) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0)
+ break;
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY)
+ break;
+
+ di = btrfs_match_dir_item_name(root, path, name, name_len);
+ if (di)
+ return di;
+
+ path->slots[0]++;
+ }
+ return NULL;
+}
+
struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6c4173146bb..644e796fd64 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -41,6 +41,7 @@
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
+static void free_fs_root(struct btrfs_root *root);
static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
@@ -123,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode,
struct extent_map *em;
int ret;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em) {
em->bdev =
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
goto out;
}
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
em = alloc_extent_map(GFP_NOFS);
if (!em) {
@@ -144,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
em->block_start = 0;
em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
if (ret == -EEXIST) {
u64 failed_start = em->start;
@@ -163,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
free_extent_map(em);
em = NULL;
}
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret)
em = ERR_PTR(ret);
@@ -895,8 +896,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->fs_info = fs_info;
root->objectid = objectid;
root->last_trans = 0;
- root->highest_inode = 0;
- root->last_inode_alloc = 0;
+ root->highest_objectid = 0;
root->name = NULL;
root->in_sysfs = 0;
root->inode_tree.rb_node = NULL;
@@ -952,14 +952,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
root, fs_info, objectid);
ret = btrfs_find_last_root(tree_root, objectid,
&root->root_item, &root->root_key);
+ if (ret > 0)
+ return -ENOENT;
BUG_ON(ret);
generation = btrfs_root_generation(&root->root_item);
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
- root->commit_root = btrfs_root_node(root);
BUG_ON(!root->node);
+ root->commit_root = btrfs_root_node(root);
return 0;
}
@@ -1095,7 +1097,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_path *path;
struct extent_buffer *l;
- u64 highest_inode;
u64 generation;
u32 blocksize;
int ret = 0;
@@ -1110,7 +1111,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
kfree(root);
return ERR_PTR(ret);
}
- goto insert;
+ goto out;
}
__setup_root(tree_root->nodesize, tree_root->leafsize,
@@ -1120,39 +1121,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
path = btrfs_alloc_path();
BUG_ON(!path);
ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
- if (ret != 0) {
- if (ret > 0)
- ret = -ENOENT;
- goto out;
+ if (ret == 0) {
+ l = path->nodes[0];
+ read_extent_buffer(l, &root->root_item,
+ btrfs_item_ptr_offset(l, path->slots[0]),
+ sizeof(root->root_item));
+ memcpy(&root->root_key, location, sizeof(*location));
}
- l = path->nodes[0];
- read_extent_buffer(l, &root->root_item,
- btrfs_item_ptr_offset(l, path->slots[0]),
- sizeof(root->root_item));
- memcpy(&root->root_key, location, sizeof(*location));
- ret = 0;
-out:
- btrfs_release_path(root, path);
btrfs_free_path(path);
if (ret) {
- kfree(root);
+ if (ret > 0)
+ ret = -ENOENT;
return ERR_PTR(ret);
}
+
generation = btrfs_root_generation(&root->root_item);
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
root->commit_root = btrfs_root_node(root);
BUG_ON(!root->node);
-insert:
- if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
+out:
+ if (location->objectid != BTRFS_TREE_LOG_OBJECTID)
root->ref_cows = 1;
- ret = btrfs_find_highest_inode(root, &highest_inode);
- if (ret == 0) {
- root->highest_inode = highest_inode;
- root->last_inode_alloc = highest_inode;
- }
- }
+
return root;
}
@@ -1187,39 +1179,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
return fs_info->dev_root;
if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
return fs_info->csum_root;
-
+again:
+ spin_lock(&fs_info->fs_roots_radix_lock);
root = radix_tree_lookup(&fs_info->fs_roots_radix,
(unsigned long)location->objectid);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
if (root)
return root;
+ ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
+ if (ret == 0)
+ ret = -ENOENT;
+ if (ret < 0)
+ return ERR_PTR(ret);
+
root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
if (IS_ERR(root))
return root;
+ WARN_ON(btrfs_root_refs(&root->root_item) == 0);
set_anon_super(&root->anon_super, NULL);
+ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ if (ret)
+ goto fail;
+
+ spin_lock(&fs_info->fs_roots_radix_lock);
ret = radix_tree_insert(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
root);
+ if (ret == 0)
+ root->in_radix = 1;
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ radix_tree_preload_end();
if (ret) {
- free_extent_buffer(root->node);
- kfree(root);
- return ERR_PTR(ret);
+ if (ret == -EEXIST) {
+ free_fs_root(root);
+ goto again;
+ }
+ goto fail;
}
- if (!(fs_info->sb->s_flags & MS_RDONLY)) {
- ret = btrfs_find_dead_roots(fs_info->tree_root,
- root->root_key.objectid);
- BUG_ON(ret);
+
+ ret = btrfs_find_dead_roots(fs_info->tree_root,
+ root->root_key.objectid);
+ WARN_ON(ret);
+
+ if (!(fs_info->sb->s_flags & MS_RDONLY))
btrfs_orphan_cleanup(root);
- }
+
return root;
+fail:
+ free_fs_root(root);
+ return ERR_PTR(ret);
}
struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *location,
const char *name, int namelen)
{
+ return btrfs_read_fs_root_no_name(fs_info, location);
+#if 0
struct btrfs_root *root;
int ret;
@@ -1236,7 +1255,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
kfree(root);
return ERR_PTR(ret);
}
-#if 0
+
ret = btrfs_sysfs_add_root(root);
if (ret) {
free_extent_buffer(root->node);
@@ -1244,9 +1263,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
kfree(root);
return ERR_PTR(ret);
}
-#endif
root->in_sysfs = 1;
return root;
+#endif
}
static int btrfs_congested_fn(void *congested_data, int bdi_bits)
@@ -1325,9 +1344,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
offset = page_offset(page);
em_tree = &BTRFS_I(inode)->extent_tree;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em) {
__unplug_io_fn(bdi, page);
return;
@@ -1360,8 +1379,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
err = bdi_register(bdi, NULL, "btrfs-%d",
atomic_inc_return(&btrfs_bdi_num));
- if (err)
+ if (err) {
+ bdi_destroy(bdi);
return err;
+ }
bdi->ra_pages = default_backing_dev_info.ra_pages;
bdi->unplug_io_fn = btrfs_unplug_io_fn;
@@ -1451,9 +1472,12 @@ static int cleaner_kthread(void *arg)
break;
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
- mutex_lock(&root->fs_info->cleaner_mutex);
- btrfs_clean_old_snapshots(root);
- mutex_unlock(&root->fs_info->cleaner_mutex);
+
+ if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
+ mutex_trylock(&root->fs_info->cleaner_mutex)) {
+ btrfs_clean_old_snapshots(root);
+ mutex_unlock(&root->fs_info->cleaner_mutex);
+ }
if (freezing(current)) {
refrigerator();
@@ -1558,15 +1582,36 @@ struct btrfs_root *open_ctree(struct super_block *sb,
err = -ENOMEM;
goto fail;
}
- INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
+
+ ret = init_srcu_struct(&fs_info->subvol_srcu);
+ if (ret) {
+ err = ret;
+ goto fail;
+ }
+
+ ret = setup_bdi(fs_info, &fs_info->bdi);
+ if (ret) {
+ err = ret;
+ goto fail_srcu;
+ }
+
+ fs_info->btree_inode = new_inode(sb);
+ if (!fs_info->btree_inode) {
+ err = -ENOMEM;
+ goto fail_bdi;
+ }
+
+ INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->hashers);
INIT_LIST_HEAD(&fs_info->delalloc_inodes);
INIT_LIST_HEAD(&fs_info->ordered_operations);
+ INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
+ spin_lock_init(&fs_info->fs_roots_radix_lock);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
@@ -1585,11 +1630,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->sb = sb;
fs_info->max_extent = (u64)-1;
fs_info->max_inline = 8192 * 1024;
- if (setup_bdi(fs_info, &fs_info->bdi))
- goto fail_bdi;
- fs_info->btree_inode = new_inode(sb);
- fs_info->btree_inode->i_ino = 1;
- fs_info->btree_inode->i_nlink = 1;
fs_info->metadata_ratio = 8;
fs_info->thread_pool_size = min_t(unsigned long,
@@ -1602,6 +1642,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
sb->s_blocksize_bits = blksize_bits(4096);
sb->s_bdi = &fs_info->bdi;
+ fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
+ fs_info->btree_inode->i_nlink = 1;
/*
* we set the i_size on the btree inode to the max possible int.
* the real end of the address space is determined by all of
@@ -1620,28 +1662,32 @@ struct btrfs_root *open_ctree(struct super_block *sb,
BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
+ BTRFS_I(fs_info->btree_inode)->root = tree_root;
+ memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
+ sizeof(struct btrfs_key));
+ BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
+ insert_inode_hash(fs_info->btree_inode);
+
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree.rb_node = NULL;
- extent_io_tree_init(&fs_info->pinned_extents,
+ extent_io_tree_init(&fs_info->freed_extents[0],
fs_info->btree_inode->i_mapping, GFP_NOFS);
+ extent_io_tree_init(&fs_info->freed_extents[1],
+ fs_info->btree_inode->i_mapping, GFP_NOFS);
+ fs_info->pinned_extents = &fs_info->freed_extents[0];
fs_info->do_barriers = 1;
- BTRFS_I(fs_info->btree_inode)->root = tree_root;
- memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
- sizeof(struct btrfs_key));
- insert_inode_hash(fs_info->btree_inode);
mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex);
- mutex_init(&fs_info->drop_mutex);
mutex_init(&fs_info->chunk_mutex);
mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
- mutex_init(&fs_info->tree_reloc_mutex);
init_rwsem(&fs_info->extent_commit_sem);
+ init_rwsem(&fs_info->subvol_sem);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -1700,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
err = -EINVAL;
goto fail_iput;
}
-
+printk("thread pool is %d\n", fs_info->thread_pool_size);
/*
* we need to start all the end_io workers up front because the
* queue work function gets called at interrupt time, and so it
@@ -1745,20 +1791,22 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->endio_workers.idle_thresh = 4;
fs_info->endio_meta_workers.idle_thresh = 4;
- fs_info->endio_write_workers.idle_thresh = 64;
- fs_info->endio_meta_write_workers.idle_thresh = 64;
+ fs_info->endio_write_workers.idle_thresh = 2;
+ fs_info->endio_meta_write_workers.idle_thresh = 2;
+
+ fs_info->endio_workers.atomic_worker_start = 1;
+ fs_info->endio_meta_workers.atomic_worker_start = 1;
+ fs_info->endio_write_workers.atomic_worker_start = 1;
+ fs_info->endio_meta_write_workers.atomic_worker_start = 1;
btrfs_start_workers(&fs_info->workers, 1);
btrfs_start_workers(&fs_info->submit_workers, 1);
btrfs_start_workers(&fs_info->delalloc_workers, 1);
btrfs_start_workers(&fs_info->fixup_workers, 1);
- btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_meta_workers,
- fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_meta_write_workers,
- fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_write_workers,
- fs_info->thread_pool_size);
+ btrfs_start_workers(&fs_info->endio_workers, 1);
+ btrfs_start_workers(&fs_info->endio_meta_workers, 1);
+ btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
+ btrfs_start_workers(&fs_info->endio_write_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1918,6 +1966,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
}
}
+ ret = btrfs_find_orphan_roots(tree_root);
+ BUG_ON(ret);
+
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_recover_relocation(tree_root);
BUG_ON(ret);
@@ -1977,6 +2028,8 @@ fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
fail_bdi:
bdi_destroy(&fs_info->bdi);
+fail_srcu:
+ cleanup_srcu_struct(&fs_info->subvol_srcu);
fail:
kfree(extent_root);
kfree(tree_root);
@@ -2236,20 +2289,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
{
- WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+ spin_lock(&fs_info->fs_roots_radix_lock);
radix_tree_delete(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+
+ if (btrfs_root_refs(&root->root_item) == 0)
+ synchronize_srcu(&fs_info->subvol_srcu);
+
+ free_fs_root(root);
+ return 0;
+}
+
+static void free_fs_root(struct btrfs_root *root)
+{
+ WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
if (root->anon_super.s_dev) {
down_write(&root->anon_super.s_umount);
kill_anon_super(&root->anon_super);
}
- if (root->node)
- free_extent_buffer(root->node);
- if (root->commit_root)
- free_extent_buffer(root->commit_root);
+ free_extent_buffer(root->node);
+ free_extent_buffer(root->commit_root);
kfree(root->name);
kfree(root);
- return 0;
}
static int del_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2258,6 +2320,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info)
struct btrfs_root *gang[8];
int i;
+ while (!list_empty(&fs_info->dead_roots)) {
+ gang[0] = list_entry(fs_info->dead_roots.next,
+ struct btrfs_root, root_list);
+ list_del(&gang[0]->root_list);
+
+ if (gang[0]->in_radix) {
+ btrfs_free_fs_root(fs_info, gang[0]);
+ } else {
+ free_extent_buffer(gang[0]->node);
+ free_extent_buffer(gang[0]->commit_root);
+ kfree(gang[0]);
+ }
+ }
+
while (1) {
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, 0,
@@ -2287,9 +2363,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
root_objectid = gang[ret - 1]->root_key.objectid + 1;
for (i = 0; i < ret; i++) {
root_objectid = gang[i]->root_key.objectid;
- ret = btrfs_find_dead_roots(fs_info->tree_root,
- root_objectid);
- BUG_ON(ret);
btrfs_orphan_cleanup(gang[i]);
}
root_objectid++;
@@ -2359,7 +2432,6 @@ int close_ctree(struct btrfs_root *root)
free_extent_buffer(root->fs_info->csum_root->commit_root);
btrfs_free_block_groups(root->fs_info);
- btrfs_free_pinned_extents(root->fs_info);
del_fs_roots(fs_info);
@@ -2378,6 +2450,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_mapping_tree_free(&fs_info->mapping_tree);
bdi_destroy(&fs_info->bdi);
+ cleanup_srcu_struct(&fs_info->subvol_srcu);
kfree(fs_info->extent_root);
kfree(fs_info->tree_root);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 9596b40caa4..ba5c3fd5ab8 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
- fid->objectid = BTRFS_I(inode)->location.objectid;
+ fid->objectid = inode->i_ino;
fid->root_objectid = BTRFS_I(inode)->root->objectid;
fid->gen = inode->i_generation;
@@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
}
static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
- u64 root_objectid, u32 generation)
+ u64 root_objectid, u32 generation,
+ int check_generation)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info;
struct btrfs_root *root;
+ struct dentry *dentry;
struct inode *inode;
struct btrfs_key key;
+ int index;
+ int err = 0;
+
+ if (objectid < BTRFS_FIRST_FREE_OBJECTID)
+ return ERR_PTR(-ESTALE);
key.objectid = root_objectid;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key);
- if (IS_ERR(root))
- return ERR_CAST(root);
+ index = srcu_read_lock(&fs_info->subvol_srcu);
+
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto fail;
+ }
+
+ if (btrfs_root_refs(&root->root_item) == 0) {
+ err = -ENOENT;
+ goto fail;
+ }
key.objectid = objectid;
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
key.offset = 0;
inode = btrfs_iget(sb, &key, root);
- if (IS_ERR(inode))
- return (void *)inode;
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto fail;
+ }
+
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
- if (generation != inode->i_generation) {
+ if (check_generation && generation != inode->i_generation) {
iput(inode);
return ERR_PTR(-ESTALE);
}
- return d_obtain_alias(inode);
+ dentry = d_obtain_alias(inode);
+ if (!IS_ERR(dentry))
+ dentry->d_op = &btrfs_dentry_operations;
+ return dentry;
+fail:
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ return ERR_PTR(err);
}
static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
@@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
objectid = fid->parent_objectid;
generation = fid->parent_gen;
- return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+ return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
@@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
root_objectid = fid->root_objectid;
generation = fid->gen;
- return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+ return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
static struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = child->d_inode;
+ static struct dentry *dentry;
struct btrfs_root *root = BTRFS_I(dir)->root;
- struct btrfs_key key;
struct btrfs_path *path;
struct extent_buffer *leaf;
- int slot;
- u64 objectid;
+ struct btrfs_root_ref *ref;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
int ret;
path = btrfs_alloc_path();
- key.objectid = dir->i_ino;
- btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
- key.offset = (u64)-1;
+ if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ key.objectid = root->root_key.objectid;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = (u64)-1;
+ root = root->fs_info->tree_root;
+ } else {
+ key.objectid = dir->i_ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = (u64)-1;
+ }
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
- /* Error */
- btrfs_free_path(path);
- return ERR_PTR(ret);
+ if (ret < 0)
+ goto fail;
+
+ BUG_ON(ret == 0);
+ if (path->slots[0] == 0) {
+ ret = -ENOENT;
+ goto fail;
}
+
+ path->slots[0]--;
leaf = path->nodes[0];
- slot = path->slots[0];
- if (ret) {
- /* btrfs_search_slot() returns the slot where we'd want to
- insert a backref for parent inode #0xFFFFFFFFFFFFFFFF.
- The _real_ backref, telling us what the parent inode
- _actually_ is, will be in the slot _before_ the one
- that btrfs_search_slot() returns. */
- if (!slot) {
- /* Unless there is _no_ key in the tree before... */
- btrfs_free_path(path);
- return ERR_PTR(-EIO);
- }
- slot--;
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid != key.objectid || found_key.type != key.type) {
+ ret = -ENOENT;
+ goto fail;
}
- btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_root_ref);
+ key.objectid = btrfs_root_ref_dirid(leaf, ref);
+ } else {
+ key.objectid = found_key.offset;
+ }
btrfs_free_path(path);
- if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
- return ERR_PTR(-EINVAL);
-
- objectid = key.offset;
-
- /* If we are already at the root of a subvol, return the real root */
- if (objectid == dir->i_ino)
- return dget(dir->i_sb->s_root);
+ if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
+ return btrfs_get_dentry(root->fs_info->sb, key.objectid,
+ found_key.offset, 0, 0);
+ }
- /* Build a new key for the inode item */
- key.objectid = objectid;
- btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+ key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
-
- return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
+ dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
+ if (!IS_ERR(dentry))
+ dentry->d_op = &btrfs_dentry_operations;
+ return dentry;
+fail:
+ btrfs_free_path(path);
+ return ERR_PTR(ret);
}
const struct export_operations btrfs_export_ops = {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 535f85ba104..993f93ff7ba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -32,12 +32,12 @@
#include "locking.h"
#include "free-space-cache.h"
-static int update_reserved_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int reserve);
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -57,10 +57,17 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
u64 flags, struct btrfs_disk_key *key,
int level, struct btrfs_key *ins);
-
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);
+static int pin_down_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 num_bytes,
+ int is_data, int reserved,
+ struct extent_buffer **must_clean);
+static int find_next_key(struct btrfs_path *path, int level,
+ struct btrfs_key *key);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -153,34 +160,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
return ret;
}
-/*
- * We always set EXTENT_LOCKED for the super mirror extents so we don't
- * overwrite them, so those bits need to be unset. Also, if we are unmounting
- * with pinned extents still sitting there because we had a block group caching,
- * we need to clear those now, since we are done.
- */
-void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
+static int add_excluded_extent(struct btrfs_root *root,
+ u64 start, u64 num_bytes)
{
- u64 start, end, last = 0;
- int ret;
+ u64 end = start + num_bytes - 1;
+ set_extent_bits(&root->fs_info->freed_extents[0],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ set_extent_bits(&root->fs_info->freed_extents[1],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ return 0;
+}
- while (1) {
- ret = find_first_extent_bit(&info->pinned_extents, last,
- &start, &end,
- EXTENT_LOCKED|EXTENT_DIRTY);
- if (ret)
- break;
+static void free_excluded_extents(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 start, end;
- clear_extent_bits(&info->pinned_extents, start, end,
- EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
- last = end+1;
- }
+ start = cache->key.objectid;
+ end = start + cache->key.offset - 1;
+
+ clear_extent_bits(&root->fs_info->freed_extents[0],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ clear_extent_bits(&root->fs_info->freed_extents[1],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
}
-static int remove_sb_from_cache(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
+static int exclude_super_stripes(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
u64 bytenr;
u64 *logical;
int stripe_len;
@@ -192,17 +199,42 @@ static int remove_sb_from_cache(struct btrfs_root *root,
cache->key.objectid, bytenr,
0, &logical, &nr, &stripe_len);
BUG_ON(ret);
+
while (nr--) {
- try_lock_extent(&fs_info->pinned_extents,
- logical[nr],
- logical[nr] + stripe_len - 1, GFP_NOFS);
+ cache->bytes_super += stripe_len;
+ ret = add_excluded_extent(root, logical[nr],
+ stripe_len);
+ BUG_ON(ret);
}
+
kfree(logical);
}
-
return 0;
}
+static struct btrfs_caching_control *
+get_caching_control(struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_caching_control *ctl;
+
+ spin_lock(&cache->lock);
+ if (cache->cached != BTRFS_CACHE_STARTED) {
+ spin_unlock(&cache->lock);
+ return NULL;
+ }
+
+ ctl = cache->caching_ctl;
+ atomic_inc(&ctl->count);
+ spin_unlock(&cache->lock);
+ return ctl;
+}
+
+static void put_caching_control(struct btrfs_caching_control *ctl)
+{
+ if (atomic_dec_and_test(&ctl->count))
+ kfree(ctl);
+}
+
/*
* this is only called by cache_block_group, since we could have freed extents
* we need to check the pinned_extents for any extents that can't be used yet
@@ -215,9 +247,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
int ret;
while (start < end) {
- ret = find_first_extent_bit(&info->pinned_extents, start,
+ ret = find_first_extent_bit(info->pinned_extents, start,
&extent_start, &extent_end,
- EXTENT_DIRTY|EXTENT_LOCKED);
+ EXTENT_DIRTY | EXTENT_UPTODATE);
if (ret)
break;
@@ -249,22 +281,27 @@ static int caching_kthread(void *data)
{
struct btrfs_block_group_cache *block_group = data;
struct btrfs_fs_info *fs_info = block_group->fs_info;
- u64 last = 0;
+ struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
+ struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_path *path;
- int ret = 0;
- struct btrfs_key key;
struct extent_buffer *leaf;
- int slot;
+ struct btrfs_key key;
u64 total_found = 0;
-
- BUG_ON(!fs_info);
+ u64 last = 0;
+ u32 nritems;
+ int ret = 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- atomic_inc(&block_group->space_info->caching_threads);
+ exclude_super_stripes(extent_root, block_group);
+ spin_lock(&block_group->space_info->lock);
+ block_group->space_info->bytes_super += block_group->bytes_super;
+ spin_unlock(&block_group->space_info->lock);
+
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+
/*
* We don't want to deadlock with somebody trying to allocate a new
* extent for the extent root while also trying to search the extent
@@ -277,74 +314,64 @@ static int caching_kthread(void *data)
key.objectid = last;
key.offset = 0;
- btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+ key.type = BTRFS_EXTENT_ITEM_KEY;
again:
+ mutex_lock(&caching_ctl->mutex);
/* need to make sure the commit_root doesn't disappear */
down_read(&fs_info->extent_commit_sem);
- ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
goto err;
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+
while (1) {
smp_mb();
- if (block_group->fs_info->closing > 1) {
+ if (fs_info->closing > 1) {
last = (u64)-1;
break;
}
- leaf = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(fs_info->extent_root, path);
- if (ret < 0)
- goto err;
- else if (ret)
+ if (path->slots[0] < nritems) {
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ } else {
+ ret = find_next_key(path, 0, &key);
+ if (ret)
break;
- if (need_resched() ||
- btrfs_transaction_in_commit(fs_info)) {
- leaf = path->nodes[0];
-
- /* this shouldn't happen, but if the
- * leaf is empty just move on.
- */
- if (btrfs_header_nritems(leaf) == 0)
- break;
- /*
- * we need to copy the key out so that
- * we are sure the next search advances
- * us forward in the btree.
- */
- btrfs_item_key_to_cpu(leaf, &key, 0);
- btrfs_release_path(fs_info->extent_root, path);
- up_read(&fs_info->extent_commit_sem);
+ caching_ctl->progress = last;
+ btrfs_release_path(extent_root, path);
+ up_read(&fs_info->extent_commit_sem);
+ mutex_unlock(&caching_ctl->mutex);
+ if (btrfs_transaction_in_commit(fs_info))
schedule_timeout(1);
- goto again;
- }
+ else
+ cond_resched();
+ goto again;
+ }
+ if (key.objectid < block_group->key.objectid) {
+ path->slots[0]++;
continue;
}
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid < block_group->key.objectid)
- goto next;
if (key.objectid >= block_group->key.objectid +
block_group->key.offset)
break;
- if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
+ if (key.type == BTRFS_EXTENT_ITEM_KEY) {
total_found += add_new_free_space(block_group,
fs_info, last,
key.objectid);
last = key.objectid + key.offset;
- }
- if (total_found > (1024 * 1024 * 2)) {
- total_found = 0;
- wake_up(&block_group->caching_q);
+ if (total_found > (1024 * 1024 * 2)) {
+ total_found = 0;
+ wake_up(&caching_ctl->wait);
+ }
}
-next:
path->slots[0]++;
}
ret = 0;
@@ -352,33 +379,65 @@ next:
total_found += add_new_free_space(block_group, fs_info, last,
block_group->key.objectid +
block_group->key.offset);
+ caching_ctl->progress = (u64)-1;
spin_lock(&block_group->lock);
+ block_group->caching_ctl = NULL;
block_group->cached = BTRFS_CACHE_FINISHED;
spin_unlock(&block_group->lock);
err:
btrfs_free_path(path);
up_read(&fs_info->extent_commit_sem);
- atomic_dec(&block_group->space_info->caching_threads);
- wake_up(&block_group->caching_q);
+ free_excluded_extents(extent_root, block_group);
+
+ mutex_unlock(&caching_ctl->mutex);
+ wake_up(&caching_ctl->wait);
+
+ put_caching_control(caching_ctl);
+ atomic_dec(&block_group->space_info->caching_threads);
return 0;
}
static int cache_block_group(struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_caching_control *caching_ctl;
struct task_struct *tsk;
int ret = 0;
+ smp_mb();
+ if (cache->cached != BTRFS_CACHE_NO)
+ return 0;
+
+ caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
+ BUG_ON(!caching_ctl);
+
+ INIT_LIST_HEAD(&caching_ctl->list);
+ mutex_init(&caching_ctl->mutex);
+ init_waitqueue_head(&caching_ctl->wait);
+ caching_ctl->block_group = cache;
+ caching_ctl->progress = cache->key.objectid;
+ /* one for caching kthread, one for caching block group list */
+ atomic_set(&caching_ctl->count, 2);
+
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
- return ret;
+ kfree(caching_ctl);
+ return 0;
}
+ cache->caching_ctl = caching_ctl;
cache->cached = BTRFS_CACHE_STARTED;
spin_unlock(&cache->lock);
+ down_write(&fs_info->extent_commit_sem);
+ list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
+ up_write(&fs_info->extent_commit_sem);
+
+ atomic_inc(&cache->space_info->caching_threads);
+
tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
cache->key.objectid);
if (IS_ERR(tsk)) {
@@ -1657,7 +1716,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
parent, ref_root, flags,
ref->objectid, ref->offset,
&ins, node->ref_mod);
- update_reserved_extents(root, ins.objectid, ins.offset, 0);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
node->num_bytes, parent,
@@ -1783,7 +1841,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
extent_op->flags_to_set,
&extent_op->key,
ref->level, &ins);
- update_reserved_extents(root, ins.objectid, ins.offset, 0);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
node->num_bytes, parent, ref_root,
@@ -1818,16 +1875,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node);
if (insert_reserved) {
+ int mark_free = 0;
+ struct extent_buffer *must_clean = NULL;
+
+ ret = pin_down_bytes(trans, root, NULL,
+ node->bytenr, node->num_bytes,
+ head->is_data, 1, &must_clean);
+ if (ret > 0)
+ mark_free = 1;
+
+ if (must_clean) {
+ clean_tree_block(NULL, root, must_clean);
+ btrfs_tree_unlock(must_clean);
+ free_extent_buffer(must_clean);
+ }
if (head->is_data) {
ret = btrfs_del_csums(trans, root,
node->bytenr,
node->num_bytes);
BUG_ON(ret);
}
- btrfs_update_pinned_extents(root, node->bytenr,
- node->num_bytes, 1);
- update_reserved_extents(root, node->bytenr,
- node->num_bytes, 0);
+ if (mark_free) {
+ ret = btrfs_free_reserved_extent(root,
+ node->bytenr,
+ node->num_bytes);
+ BUG_ON(ret);
+ }
}
mutex_unlock(&head->mutex);
return 0;
@@ -2706,6 +2779,8 @@ int btrfs_check_metadata_free_space(struct btrfs_root *root)
/* get the space info for where the metadata will live */
alloc_target = btrfs_get_alloc_profile(root, 0);
meta_sinfo = __find_space_info(info, alloc_target);
+ if (!meta_sinfo)
+ goto alloc;
again:
spin_lock(&meta_sinfo->lock);
@@ -2717,12 +2792,13 @@ again:
do_div(thresh, 100);
if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
- meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
+ meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
+ meta_sinfo->bytes_super > thresh) {
struct btrfs_trans_handle *trans;
if (!meta_sinfo->full) {
meta_sinfo->force_alloc = 1;
spin_unlock(&meta_sinfo->lock);
-
+alloc:
trans = btrfs_start_transaction(root, 1);
if (!trans)
return -ENOMEM;
@@ -2730,6 +2806,10 @@ again:
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2 * 1024 * 1024, alloc_target, 0);
btrfs_end_transaction(trans, root);
+ if (!meta_sinfo) {
+ meta_sinfo = __find_space_info(info,
+ alloc_target);
+ }
goto again;
}
spin_unlock(&meta_sinfo->lock);
@@ -2765,13 +2845,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
data_sinfo = BTRFS_I(inode)->space_info;
+ if (!data_sinfo)
+ goto alloc;
+
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
if (data_sinfo->total_bytes - data_sinfo->bytes_used -
data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
- data_sinfo->bytes_may_use < bytes) {
+ data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
struct btrfs_trans_handle *trans;
/*
@@ -2783,7 +2866,7 @@ again:
data_sinfo->force_alloc = 1;
spin_unlock(&data_sinfo->lock);
-
+alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
trans = btrfs_start_transaction(root, 1);
if (!trans)
@@ -2795,6 +2878,11 @@ again:
btrfs_end_transaction(trans, root);
if (ret)
return ret;
+
+ if (!data_sinfo) {
+ btrfs_set_inode_space_info(root, inode);
+ data_sinfo = BTRFS_I(inode)->space_info;
+ }
goto again;
}
spin_unlock(&data_sinfo->lock);
@@ -3009,10 +3097,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
num_bytes = min(total, cache->key.offset - byte_in_group);
if (alloc) {
old_val += num_bytes;
+ btrfs_set_block_group_used(&cache->item, old_val);
+ cache->reserved -= num_bytes;
cache->space_info->bytes_used += num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
if (cache->ro)
cache->space_info->bytes_readonly -= num_bytes;
- btrfs_set_block_group_used(&cache->item, old_val);
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
} else {
@@ -3057,127 +3147,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
return bytenr;
}
-int btrfs_update_pinned_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int pin)
+/*
+ * this function must be called within transaction
+ */
+int btrfs_pin_extent(struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int reserved)
{
- u64 len;
- struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_group_cache *cache;
- if (pin)
- set_extent_dirty(&fs_info->pinned_extents,
- bytenr, bytenr + num - 1, GFP_NOFS);
-
- while (num > 0) {
- cache = btrfs_lookup_block_group(fs_info, bytenr);
- BUG_ON(!cache);
- len = min(num, cache->key.offset -
- (bytenr - cache->key.objectid));
- if (pin) {
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- cache->pinned += len;
- cache->space_info->bytes_pinned += len;
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
- fs_info->total_pinned += len;
- } else {
- int unpin = 0;
+ cache = btrfs_lookup_block_group(fs_info, bytenr);
+ BUG_ON(!cache);
- /*
- * in order to not race with the block group caching, we
- * only want to unpin the extent if we are cached. If
- * we aren't cached, we want to start async caching this
- * block group so we can free the extent the next time
- * around.
- */
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- unpin = (cache->cached == BTRFS_CACHE_FINISHED);
- if (likely(unpin)) {
- cache->pinned -= len;
- cache->space_info->bytes_pinned -= len;
- fs_info->total_pinned -= len;
- }
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ cache->pinned += num_bytes;
+ cache->space_info->bytes_pinned += num_bytes;
+ if (reserved) {
+ cache->reserved -= num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
+ }
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
- if (likely(unpin))
- clear_extent_dirty(&fs_info->pinned_extents,
- bytenr, bytenr + len -1,
- GFP_NOFS);
- else
- cache_block_group(cache);
+ btrfs_put_block_group(cache);
- if (unpin)
- btrfs_add_free_space(cache, bytenr, len);
- }
- btrfs_put_block_group(cache);
- bytenr += len;
- num -= len;
+ set_extent_dirty(fs_info->pinned_extents,
+ bytenr, bytenr + num_bytes - 1, GFP_NOFS);
+ return 0;
+}
+
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve)
+{
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ if (reserve) {
+ cache->reserved += num_bytes;
+ cache->space_info->bytes_reserved += num_bytes;
+ } else {
+ cache->reserved -= num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
}
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
return 0;
}
-static int update_reserved_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int reserve)
+int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
{
- u64 len;
- struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_caching_control *next;
+ struct btrfs_caching_control *caching_ctl;
+ struct btrfs_block_group_cache *cache;
- while (num > 0) {
- cache = btrfs_lookup_block_group(fs_info, bytenr);
- BUG_ON(!cache);
- len = min(num, cache->key.offset -
- (bytenr - cache->key.objectid));
+ down_write(&fs_info->extent_commit_sem);
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- if (reserve) {
- cache->reserved += len;
- cache->space_info->bytes_reserved += len;
+ list_for_each_entry_safe(caching_ctl, next,
+ &fs_info->caching_block_groups, list) {
+ cache = caching_ctl->block_group;
+ if (block_group_cache_done(cache)) {
+ cache->last_byte_to_unpin = (u64)-1;
+ list_del_init(&caching_ctl->list);
+ put_caching_control(caching_ctl);
} else {
- cache->reserved -= len;
- cache->space_info->bytes_reserved -= len;
+ cache->last_byte_to_unpin = caching_ctl->progress;
}
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
- btrfs_put_block_group(cache);
- bytenr += len;
- num -= len;
}
+
+ if (fs_info->pinned_extents == &fs_info->freed_extents[0])
+ fs_info->pinned_extents = &fs_info->freed_extents[1];
+ else
+ fs_info->pinned_extents = &fs_info->freed_extents[0];
+
+ up_write(&fs_info->extent_commit_sem);
return 0;
}
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
{
- u64 last = 0;
- u64 start;
- u64 end;
- struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
- int ret;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_group_cache *cache = NULL;
+ u64 len;
- while (1) {
- ret = find_first_extent_bit(pinned_extents, last,
- &start, &end, EXTENT_DIRTY);
- if (ret)
- break;
+ while (start <= end) {
+ if (!cache ||
+ start >= cache->key.objectid + cache->key.offset) {
+ if (cache)
+ btrfs_put_block_group(cache);
+ cache = btrfs_lookup_block_group(fs_info, start);
+ BUG_ON(!cache);
+ }
+
+ len = cache->key.objectid + cache->key.offset - start;
+ len = min(len, end + 1 - start);
- set_extent_dirty(copy, start, end, GFP_NOFS);
- last = end + 1;
+ if (start < cache->last_byte_to_unpin) {
+ len = min(len, cache->last_byte_to_unpin - start);
+ btrfs_add_free_space(cache, start, len);
+ }
+
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ cache->pinned -= len;
+ cache->space_info->bytes_pinned -= len;
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
+
+ start += len;
}
+
+ if (cache)
+ btrfs_put_block_group(cache);
return 0;
}
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_io_tree *unpin)
+ struct btrfs_root *root)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_io_tree *unpin;
u64 start;
u64 end;
int ret;
+ if (fs_info->pinned_extents == &fs_info->freed_extents[0])
+ unpin = &fs_info->freed_extents[1];
+ else
+ unpin = &fs_info->freed_extents[0];
+
while (1) {
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY);
@@ -3186,10 +3285,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
ret = btrfs_discard_extent(root, start, end + 1 - start);
- /* unlocks the pinned mutex */
- btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
-
+ unpin_extent_range(root, start, end);
cond_resched();
}
@@ -3199,7 +3296,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- u64 bytenr, u64 num_bytes, int is_data,
+ u64 bytenr, u64 num_bytes,
+ int is_data, int reserved,
struct extent_buffer **must_clean)
{
int err = 0;
@@ -3231,15 +3329,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
}
free_extent_buffer(buf);
pinit:
- btrfs_set_path_blocking(path);
+ if (path)
+ btrfs_set_path_blocking(path);
/* unlocks the pinned mutex */
- btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
+ btrfs_pin_extent(root, bytenr, num_bytes, reserved);
BUG_ON(err < 0);
return 0;
}
-
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -3413,7 +3511,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
ret = pin_down_bytes(trans, root, path, bytenr,
- num_bytes, is_data, &must_clean);
+ num_bytes, is_data, 0, &must_clean);
if (ret > 0)
mark_free = 1;
BUG_ON(ret < 0);
@@ -3544,8 +3642,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
/* unlocks the pinned mutex */
- btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
- update_reserved_extents(root, bytenr, num_bytes, 0);
+ btrfs_pin_extent(root, bytenr, num_bytes, 1);
ret = 0;
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
@@ -3585,19 +3682,33 @@ static noinline int
wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
u64 num_bytes)
{
+ struct btrfs_caching_control *caching_ctl;
DEFINE_WAIT(wait);
- prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
-
- if (block_group_cache_done(cache)) {
- finish_wait(&cache->caching_q, &wait);
+ caching_ctl = get_caching_control(cache);
+ if (!caching_ctl)
return 0;
- }
- schedule();
- finish_wait(&cache->caching_q, &wait);
- wait_event(cache->caching_q, block_group_cache_done(cache) ||
+ wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
(cache->free_space >= num_bytes));
+
+ put_caching_control(caching_ctl);
+ return 0;
+}
+
+static noinline int
+wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_caching_control *caching_ctl;
+ DEFINE_WAIT(wait);
+
+ caching_ctl = get_caching_control(cache);
+ if (!caching_ctl)
+ return 0;
+
+ wait_event(caching_ctl->wait, block_group_cache_done(cache));
+
+ put_caching_control(caching_ctl);
return 0;
}
@@ -3635,6 +3746,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
int last_ptr_loop = 0;
int loop = 0;
bool found_uncached_bg = false;
+ bool failed_cluster_refill = false;
WARN_ON(num_bytes < root->sectorsize);
btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3732,7 +3844,16 @@ have_block_group:
if (unlikely(block_group->ro))
goto loop;
- if (last_ptr) {
+ /*
+ * Ok we want to try and use the cluster allocator, so lets look
+ * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
+ * have tried the cluster allocator plenty of times at this
+ * point and not have found anything, so we are likely way too
+ * fragmented for the clustering stuff to find anything, so lets
+ * just skip it and let the allocator find whatever block it can
+ * find
+ */
+ if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
/*
* the refill lock keeps out other
* people trying to start a new cluster
@@ -3807,9 +3928,11 @@ refill_cluster:
spin_unlock(&last_ptr->refill_lock);
goto checks;
}
- } else if (!cached && loop > LOOP_CACHING_NOWAIT) {
+ } else if (!cached && loop > LOOP_CACHING_NOWAIT
+ && !failed_cluster_refill) {
spin_unlock(&last_ptr->refill_lock);
+ failed_cluster_refill = true;
wait_block_group_cache_progress(block_group,
num_bytes + empty_cluster + empty_size);
goto have_block_group;
@@ -3821,13 +3944,9 @@ refill_cluster:
* cluster. Free the cluster we've been trying
* to use, and go to the next block group
*/
- if (loop < LOOP_NO_EMPTY_SIZE) {
- btrfs_return_cluster_to_free_space(NULL,
- last_ptr);
- spin_unlock(&last_ptr->refill_lock);
- goto loop;
- }
+ btrfs_return_cluster_to_free_space(NULL, last_ptr);
spin_unlock(&last_ptr->refill_lock);
+ goto loop;
}
offset = btrfs_find_space_for_alloc(block_group, search_start,
@@ -3881,9 +4000,12 @@ checks:
search_start - offset);
BUG_ON(offset > search_start);
+ update_reserved_extents(block_group, num_bytes, 1);
+
/* we are all good, lets return */
break;
loop:
+ failed_cluster_refill = false;
btrfs_put_block_group(block_group);
}
up_read(&space_info->groups_sem);
@@ -3973,12 +4095,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
up_read(&info->groups_sem);
}
-static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins,
- u64 data)
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data)
{
int ret;
u64 search_start = 0;
@@ -4044,25 +4166,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
ret = btrfs_discard_extent(root, start, len);
btrfs_add_free_space(cache, start, len);
+ update_reserved_extents(cache, len, 0);
btrfs_put_block_group(cache);
- update_reserved_extents(root, start, len, 0);
-
- return ret;
-}
-
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins,
- u64 data)
-{
- int ret;
- ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
- empty_size, hint_byte, search_end, ins,
- data);
- if (!ret)
- update_reserved_extents(root, ins->objectid, ins->offset, 1);
return ret;
}
@@ -4223,15 +4328,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
{
int ret;
struct btrfs_block_group_cache *block_group;
+ struct btrfs_caching_control *caching_ctl;
+ u64 start = ins->objectid;
+ u64 num_bytes = ins->offset;
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
cache_block_group(block_group);
- wait_event(block_group->caching_q,
- block_group_cache_done(block_group));
+ caching_ctl = get_caching_control(block_group);
- ret = btrfs_remove_free_space(block_group, ins->objectid,
- ins->offset);
- BUG_ON(ret);
+ if (!caching_ctl) {
+ BUG_ON(!block_group_cache_done(block_group));
+ ret = btrfs_remove_free_space(block_group, start, num_bytes);
+ BUG_ON(ret);
+ } else {
+ mutex_lock(&caching_ctl->mutex);
+
+ if (start >= caching_ctl->progress) {
+ ret = add_excluded_extent(root, start, num_bytes);
+ BUG_ON(ret);
+ } else if (start + num_bytes <= caching_ctl->progress) {
+ ret = btrfs_remove_free_space(block_group,
+ start, num_bytes);
+ BUG_ON(ret);
+ } else {
+ num_bytes = caching_ctl->progress - start;
+ ret = btrfs_remove_free_space(block_group,
+ start, num_bytes);
+ BUG_ON(ret);
+
+ start = caching_ctl->progress;
+ num_bytes = ins->objectid + ins->offset -
+ caching_ctl->progress;
+ ret = add_excluded_extent(root, start, num_bytes);
+ BUG_ON(ret);
+ }
+
+ mutex_unlock(&caching_ctl->mutex);
+ put_caching_control(caching_ctl);
+ }
+
+ update_reserved_extents(block_group, ins->offset, 1);
btrfs_put_block_group(block_group);
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
@@ -4255,9 +4391,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
int ret;
u64 flags = 0;
- ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
- empty_size, hint_byte, search_end,
- ins, 0);
+ ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
+ empty_size, hint_byte, search_end,
+ ins, 0);
if (ret)
return ret;
@@ -4268,7 +4404,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
} else
BUG_ON(parent > 0);
- update_reserved_extents(root, ins->objectid, ins->offset, 1);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
@@ -4347,452 +4482,99 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
return buf;
}
-#if 0
-int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *leaf)
-{
- u64 disk_bytenr;
- u64 num_bytes;
- struct btrfs_key key;
- struct btrfs_file_extent_item *fi;
- u32 nritems;
- int i;
- int ret;
-
- BUG_ON(!btrfs_is_leaf(leaf));
- nritems = btrfs_header_nritems(leaf);
-
- for (i = 0; i < nritems; i++) {
- cond_resched();
- btrfs_item_key_to_cpu(leaf, &key, i);
-
- /* only extents have references, skip everything else */
- if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
- continue;
-
- fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
-
- /* inline extents live in the btree, they don't have refs */
- if (btrfs_file_extent_type(leaf, fi) ==
- BTRFS_FILE_EXTENT_INLINE)
- continue;
-
- disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
-
- /* holes don't have refs */
- if (disk_bytenr == 0)
- continue;
-
- num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
- ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
- leaf->start, 0, key.objectid, 0);
- BUG_ON(ret);
- }
- return 0;
-}
-
-static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_leaf_ref *ref)
-{
- int i;
- int ret;
- struct btrfs_extent_info *info;
- struct refsort *sorted;
-
- if (ref->nritems == 0)
- return 0;
-
- sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
- for (i = 0; i < ref->nritems; i++) {
- sorted[i].bytenr = ref->extents[i].bytenr;
- sorted[i].slot = i;
- }
- sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
-
- /*
- * the items in the ref were sorted when the ref was inserted
- * into the ref cache, so this is already in order
- */
- for (i = 0; i < ref->nritems; i++) {
- info = ref->extents + sorted[i].slot;
- ret = btrfs_free_extent(trans, root, info->bytenr,
- info->num_bytes, ref->bytenr,
- ref->owner, ref->generation,
- info->objectid, 0);
-
- atomic_inc(&root->fs_info->throttle_gen);
- wake_up(&root->fs_info->transaction_throttle);
- cond_resched();
-
- BUG_ON(ret);
- info++;
- }
-
- kfree(sorted);
- return 0;
-}
-
-
-static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 start,
- u64 len, u32 *refs)
-{
- int ret;
-
- ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
- BUG_ON(ret);
-
-#if 0 /* some debugging code in case we see problems here */
- /* if the refs count is one, it won't get increased again. But
- * if the ref count is > 1, someone may be decreasing it at
- * the same time we are.
- */
- if (*refs != 1) {
- struct extent_buffer *eb = NULL;
- eb = btrfs_find_create_tree_block(root, start, len);
- if (eb)
- btrfs_tree_lock(eb);
-
- mutex_lock(&root->fs_info->alloc_mutex);
- ret = lookup_extent_ref(NULL, root, start, len, refs);
- BUG_ON(ret);
- mutex_unlock(&root->fs_info->alloc_mutex);
-
- if (eb) {
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
- }
- if (*refs == 1) {
- printk(KERN_ERR "btrfs block %llu went down to one "
- "during drop_snap\n", (unsigned long long)start);
- }
-
- }
-#endif
-
- cond_resched();
- return ret;
-}
+struct walk_control {
+ u64 refs[BTRFS_MAX_LEVEL];
+ u64 flags[BTRFS_MAX_LEVEL];
+ struct btrfs_key update_progress;
+ int stage;
+ int level;
+ int shared_level;
+ int update_ref;
+ int keep_locks;
+ int reada_slot;
+ int reada_count;
+};
+#define DROP_REFERENCE 1
+#define UPDATE_BACKREF 2
-/*
- * this is used while deleting old snapshots, and it drops the refs
- * on a whole subtree starting from a level 1 node.
- *
- * The idea is to sort all the leaf pointers, and then drop the
- * ref on all the leaves in order. Most of the time the leaves
- * will have ref cache entries, so no leaf IOs will be required to
- * find the extents they have references on.
- *
- * For each leaf, any references it has are also dropped in order
- *
- * This ends up dropping the references in something close to optimal
- * order for reading and modifying the extent allocation tree.
- */
-static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path)
+static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct walk_control *wc,
+ struct btrfs_path *path)
{
u64 bytenr;
- u64 root_owner;
- u64 root_gen;
- struct extent_buffer *eb = path->nodes[1];
- struct extent_buffer *leaf;
- struct btrfs_leaf_ref *ref;
- struct refsort *sorted = NULL;
- int nritems = btrfs_header_nritems(eb);
+ u64 generation;
+ u64 refs;
+ u64 last = 0;
+ u32 nritems;
+ u32 blocksize;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
int ret;
- int i;
- int refi = 0;
- int slot = path->slots[1];
- u32 blocksize = btrfs_level_size(root, 0);
- u32 refs;
-
- if (nritems == 0)
- goto out;
-
- root_owner = btrfs_header_owner(eb);
- root_gen = btrfs_header_generation(eb);
- sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
+ int slot;
+ int nread = 0;
- /*
- * step one, sort all the leaf pointers so we don't scribble
- * randomly into the extent allocation tree
- */
- for (i = slot; i < nritems; i++) {
- sorted[refi].bytenr = btrfs_node_blockptr(eb, i);
- sorted[refi].slot = i;
- refi++;
+ if (path->slots[wc->level] < wc->reada_slot) {
+ wc->reada_count = wc->reada_count * 2 / 3;
+ wc->reada_count = max(wc->reada_count, 2);
+ } else {
+ wc->reada_count = wc->reada_count * 3 / 2;
+ wc->reada_count = min_t(int, wc->reada_count,
+ BTRFS_NODEPTRS_PER_BLOCK(root));
}
- /*
- * nritems won't be zero, but if we're picking up drop_snapshot
- * after a crash, slot might be > 0, so double check things
- * just in case.
- */
- if (refi == 0)
- goto out;
+ eb = path->nodes[wc->level];
+ nritems = btrfs_header_nritems(eb);
+ blocksize = btrfs_level_size(root, wc->level - 1);
- sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
+ for (slot = path->slots[wc->level]; slot < nritems; slot++) {
+ if (nread >= wc->reada_count)
+ break;
- /*
- * the first loop frees everything the leaves point to
- */
- for (i = 0; i < refi; i++) {
- u64 ptr_gen;
+ cond_resched();
+ bytenr = btrfs_node_blockptr(eb, slot);
+ generation = btrfs_node_ptr_generation(eb, slot);
- bytenr = sorted[i].bytenr;
+ if (slot == path->slots[wc->level])
+ goto reada;
- /*
- * check the reference count on this leaf. If it is > 1
- * we just decrement it below and don't update any
- * of the refs the leaf points to.
- */
- ret = drop_snap_lookup_refcount(trans, root, bytenr,
- blocksize, &refs);
- BUG_ON(ret);
- if (refs != 1)
+ if (wc->stage == UPDATE_BACKREF &&
+ generation <= root->root_key.offset)
continue;
- ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot);
-
- /*
- * the leaf only had one reference, which means the
- * only thing pointing to this leaf is the snapshot
- * we're deleting. It isn't possible for the reference
- * count to increase again later
- *
- * The reference cache is checked for the leaf,
- * and if found we'll be able to drop any refs held by
- * the leaf without needing to read it in.
- */
- ref = btrfs_lookup_leaf_ref(root, bytenr);
- if (ref && ref->generation != ptr_gen) {
- btrfs_free_leaf_ref(root, ref);
- ref = NULL;
- }
- if (ref) {
- ret = cache_drop_leaf_ref(trans, root, ref);
- BUG_ON(ret);
- btrfs_remove_leaf_ref(root, ref);
- btrfs_free_leaf_ref(root, ref);
- } else {
- /*
- * the leaf wasn't in the reference cache, so
- * we have to read it.
- */
- leaf = read_tree_block(root, bytenr, blocksize,
- ptr_gen);
- ret = btrfs_drop_leaf_ref(trans, root, leaf);
+ if (wc->stage == DROP_REFERENCE) {
+ ret = btrfs_lookup_extent_info(trans, root,
+ bytenr, blocksize,
+ &refs, NULL);
BUG_ON(ret);
- free_extent_buffer(leaf);
- }
- atomic_inc(&root->fs_info->throttle_gen);
- wake_up(&root->fs_info->transaction_throttle);
- cond_resched();
- }
-
- /*
- * run through the loop again to free the refs on the leaves.
- * This is faster than doing it in the loop above because
- * the leaves are likely to be clustered together. We end up
- * working in nice chunks on the extent allocation tree.
- */
- for (i = 0; i < refi; i++) {
- bytenr = sorted[i].bytenr;
- ret = btrfs_free_extent(trans, root, bytenr,
- blocksize, eb->start,
- root_owner, root_gen, 0, 1);
- BUG_ON(ret);
-
- atomic_inc(&root->fs_info->throttle_gen);
- wake_up(&root->fs_info->transaction_throttle);
- cond_resched();
- }
-out:
- kfree(sorted);
-
- /*
- * update the path to show we've processed the entire level 1
- * node. This will get saved into the root's drop_snapshot_progress
- * field so these drops are not repeated again if this transaction
- * commits.
- */
- path->slots[1] = nritems;
- return 0;
-}
-
-/*
- * helper function for drop_snapshot, this walks down the tree dropping ref
- * counts as it goes.
- */
-static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int *level)
-{
- u64 root_owner;
- u64 root_gen;
- u64 bytenr;
- u64 ptr_gen;
- struct extent_buffer *next;
- struct extent_buffer *cur;
- struct extent_buffer *parent;
- u32 blocksize;
- int ret;
- u32 refs;
-
- WARN_ON(*level < 0);
- WARN_ON(*level >= BTRFS_MAX_LEVEL);
- ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
- path->nodes[*level]->len, &refs);
- BUG_ON(ret);
- if (refs > 1)
- goto out;
-
- /*
- * walk down to the last node level and free all the leaves
- */
- while (*level >= 0) {
- WARN_ON(*level < 0);
- WARN_ON(*level >= BTRFS_MAX_LEVEL);
- cur = path->nodes[*level];
-
- if (btrfs_header_level(cur) != *level)
- WARN_ON(1);
+ BUG_ON(refs == 0);
+ if (refs == 1)
+ goto reada;
- if (path->slots[*level] >=
- btrfs_header_nritems(cur))
- break;
-
- /* the new code goes down to level 1 and does all the
- * leaves pointed to that node in bulk. So, this check
- * for level 0 will always be false.
- *
- * But, the disk format allows the drop_snapshot_progress
- * field in the root to leave things in a state where
- * a leaf will need cleaning up here. If someone crashes
- * with the old code and then boots with the new code,
- * we might find a leaf here.
- */
- if (*level == 0) {
- ret = btrfs_drop_leaf_ref(trans, root, cur);
- BUG_ON(ret);
- break;
+ if (!wc->update_ref ||
+ generation <= root->root_key.offset)
+ continue;
+ btrfs_node_key_to_cpu(eb, &key, slot);
+ ret = btrfs_comp_cpu_keys(&key,
+ &wc->update_progress);
+ if (ret < 0)
+ continue;
}
-
- /*
- * once we get to level one, process the whole node
- * at once, including everything below it.
- */
- if (*level == 1) {
- ret = drop_level_one_refs(trans, root, path);
- BUG_ON(ret);
+reada:
+ ret = readahead_tree_block(root, bytenr, blocksize,
+ generation);
+ if (ret)
break;
- }
-
- bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
- ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
- blocksize = btrfs_level_size(root, *level - 1);
-
- ret = drop_snap_lookup_refcount(trans, root, bytenr,
- blocksize, &refs);
- BUG_ON(ret);
-
- /*
- * if there is more than one reference, we don't need
- * to read that node to drop any references it has. We
- * just drop the ref we hold on that node and move on to the
- * next slot in this level.
- */
- if (refs != 1) {
- parent = path->nodes[*level];
- root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
- path->slots[*level]++;
-
- ret = btrfs_free_extent(trans, root, bytenr,
- blocksize, parent->start,
- root_owner, root_gen,
- *level - 1, 1);
- BUG_ON(ret);
-
- atomic_inc(&root->fs_info->throttle_gen);
- wake_up(&root->fs_info->transaction_throttle);
- cond_resched();
-
- continue;
- }
-
- /*
- * we need to keep freeing things in the next level down.
- * read the block and loop around to process it
- */
- next = read_tree_block(root, bytenr, blocksize, ptr_gen);
- WARN_ON(*level <= 0);
- if (path->nodes[*level-1])
- free_extent_buffer(path->nodes[*level-1]);
- path->nodes[*level-1] = next;
- *level = btrfs_header_level(next);
- path->slots[*level] = 0;
- cond_resched();
+ last = bytenr + blocksize;
+ nread++;
}
-out:
- WARN_ON(*level < 0);
- WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
- if (path->nodes[*level] == root->node) {
- parent = path->nodes[*level];
- bytenr = path->nodes[*level]->start;
- } else {
- parent = path->nodes[*level + 1];
- bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
- }
-
- blocksize = btrfs_level_size(root, *level);
- root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
-
- /*
- * cleanup and free the reference on the last node
- * we processed
- */
- ret = btrfs_free_extent(trans, root, bytenr, blocksize,
- parent->start, root_owner, root_gen,
- *level, 1);
- free_extent_buffer(path->nodes[*level]);
- path->nodes[*level] = NULL;
-
- *level += 1;
- BUG_ON(ret);
-
- cond_resched();
- return 0;
+ wc->reada_slot = slot;
}
-#endif
-
-struct walk_control {
- u64 refs[BTRFS_MAX_LEVEL];
- u64 flags[BTRFS_MAX_LEVEL];
- struct btrfs_key update_progress;
- int stage;
- int level;
- int shared_level;
- int update_ref;
- int keep_locks;
-};
-
-#define DROP_REFERENCE 1
-#define UPDATE_BACKREF 2
/*
* hepler to process tree block while walking down the tree.
*
- * when wc->stage == DROP_REFERENCE, this function checks
- * reference count of the block. if the block is shared and
- * we need update back refs for the subtree rooted at the
- * block, this function changes wc->stage to UPDATE_BACKREF
- *
* when wc->stage == UPDATE_BACKREF, this function updates
* back refs for pointers in the block.
*
@@ -4805,7 +4587,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
{
int level = wc->level;
struct extent_buffer *eb = path->nodes[level];
- struct btrfs_key key;
u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
int ret;
@@ -4828,21 +4609,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
BUG_ON(wc->refs[level] == 0);
}
- if (wc->stage == DROP_REFERENCE &&
- wc->update_ref && wc->refs[level] > 1) {
- BUG_ON(eb == root->node);
- BUG_ON(path->slots[level] > 0);
- if (level == 0)
- btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
- else
- btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
- if (btrfs_header_owner(eb) == root->root_key.objectid &&
- btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
- wc->stage = UPDATE_BACKREF;
- wc->shared_level = level;
- }
- }
-
if (wc->stage == DROP_REFERENCE) {
if (wc->refs[level] > 1)
return 1;
@@ -4879,6 +4645,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
}
/*
+ * hepler to process tree block pointer.
+ *
+ * when wc->stage == DROP_REFERENCE, this function checks
+ * reference count of the block pointed to. if the block
+ * is shared and we need update back refs for the subtree
+ * rooted at the block, this function changes wc->stage to
+ * UPDATE_BACKREF. if the block is shared and there is no
+ * need to update back, this function drops the reference
+ * to the block.
+ *
+ * NOTE: return value 1 means we should stop walking down.
+ */
+static noinline int do_walk_down(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct walk_control *wc)
+{
+ u64 bytenr;
+ u64 generation;
+ u64 parent;
+ u32 blocksize;
+ struct btrfs_key key;
+ struct extent_buffer *next;
+ int level = wc->level;
+ int reada = 0;
+ int ret = 0;
+
+ generation = btrfs_node_ptr_generation(path->nodes[level],
+ path->slots[level]);
+ /*
+ * if the lower level block was created before the snapshot
+ * was created, we know there is no need to update back refs
+ * for the subtree
+ */
+ if (wc->stage == UPDATE_BACKREF &&
+ generation <= root->root_key.offset)
+ return 1;
+
+ bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
+ blocksize = btrfs_level_size(root, level - 1);
+
+ next = btrfs_find_tree_block(root, bytenr, blocksize);
+ if (!next) {
+ next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+ reada = 1;
+ }
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+
+ if (wc->stage == DROP_REFERENCE) {
+ ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+ &wc->refs[level - 1],
+ &wc->flags[level - 1]);
+ BUG_ON(ret);
+ BUG_ON(wc->refs[level - 1] == 0);
+
+ if (wc->refs[level - 1] > 1) {
+ if (!wc->update_ref ||
+ generation <= root->root_key.offset)
+ goto skip;
+
+ btrfs_node_key_to_cpu(path->nodes[level], &key,
+ path->slots[level]);
+ ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
+ if (ret < 0)
+ goto skip;
+
+ wc->stage = UPDATE_BACKREF;
+ wc->shared_level = level - 1;
+ }
+ }
+
+ if (!btrfs_buffer_uptodate(next, generation)) {
+ btrfs_tree_unlock(next);
+ free_extent_buffer(next);
+ next = NULL;
+ }
+
+ if (!next) {
+ if (reada && level == 1)
+ reada_walk_down(trans, root, wc, path);
+ next = read_tree_block(root, bytenr, blocksize, generation);
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ }
+
+ level--;
+ BUG_ON(level != btrfs_header_level(next));
+ path->nodes[level] = next;
+ path->slots[level] = 0;
+ path->locks[level] = 1;
+ wc->level = level;
+ if (wc->level == 1)
+ wc->reada_slot = 0;
+ return 0;
+skip:
+ wc->refs[level - 1] = 0;
+ wc->flags[level - 1] = 0;
+
+ if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+ parent = path->nodes[level]->start;
+ } else {
+ BUG_ON(root->root_key.objectid !=
+ btrfs_header_owner(path->nodes[level]));
+ parent = 0;
+ }
+
+ ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
+ root->root_key.objectid, level - 1, 0);
+ BUG_ON(ret);
+
+ btrfs_tree_unlock(next);
+ free_extent_buffer(next);
+ return 1;
+}
+
+/*
* hepler to process tree block while walking up the tree.
*
* when wc->stage == DROP_REFERENCE, this function drops
@@ -4905,7 +4788,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (level < wc->shared_level)
goto out;
- BUG_ON(wc->refs[level] <= 1);
ret = find_next_key(path, level + 1, &wc->update_progress);
if (ret > 0)
wc->update_ref = 0;
@@ -4936,8 +4818,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
path->locks[level] = 0;
return 1;
}
- } else {
- BUG_ON(level != 0);
}
}
@@ -4990,17 +4870,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct walk_control *wc)
{
- struct extent_buffer *next;
- struct extent_buffer *cur;
- u64 bytenr;
- u64 ptr_gen;
- u32 blocksize;
int level = wc->level;
int ret;
while (level >= 0) {
- cur = path->nodes[level];
- BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
+ if (path->slots[level] >=
+ btrfs_header_nritems(path->nodes[level]))
+ break;
ret = walk_down_proc(trans, root, path, wc);
if (ret > 0)
@@ -5009,20 +4885,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
if (level == 0)
break;
- bytenr = btrfs_node_blockptr(cur, path->slots[level]);
- blocksize = btrfs_level_size(root, level - 1);
- ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
-
- next = read_tree_block(root, bytenr, blocksize, ptr_gen);
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
-
- level--;
- BUG_ON(level != btrfs_header_level(next));
- path->nodes[level] = next;
- path->slots[level] = 0;
- path->locks[level] = 1;
- wc->level = level;
+ ret = do_walk_down(trans, root, path, wc);
+ if (ret > 0) {
+ path->slots[level]++;
+ continue;
+ }
+ level = wc->level;
}
return 0;
}
@@ -5112,9 +4980,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
err = ret;
goto out;
}
- btrfs_node_key_to_cpu(path->nodes[level], &key,
- path->slots[level]);
- WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
+ WARN_ON(ret > 0);
/*
* unlock our path, this is safe because only this
@@ -5149,6 +5015,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
wc->stage = DROP_REFERENCE;
wc->update_ref = update_ref;
wc->keep_locks = 0;
+ wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
ret = walk_down_tree(trans, root, path, wc);
@@ -5201,9 +5068,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
ret = btrfs_del_root(trans, tree_root, &root->root_key);
BUG_ON(ret);
- free_extent_buffer(root->node);
- free_extent_buffer(root->commit_root);
- kfree(root);
+ if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+ ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
+ NULL, NULL);
+ BUG_ON(ret < 0);
+ if (ret > 0) {
+ ret = btrfs_del_orphan_item(trans, tree_root,
+ root->root_key.objectid);
+ BUG_ON(ret);
+ }
+ }
+
+ if (root->in_radix) {
+ btrfs_free_fs_root(tree_root->fs_info, root);
+ } else {
+ free_extent_buffer(root->node);
+ free_extent_buffer(root->commit_root);
+ kfree(root);
+ }
out:
btrfs_end_transaction(trans, tree_root);
kfree(wc);
@@ -5255,6 +5137,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
wc->stage = DROP_REFERENCE;
wc->update_ref = 0;
wc->keep_locks = 1;
+ wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
wret = walk_down_tree(trans, root, path, wc);
@@ -5397,9 +5280,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
while (1) {
int ret;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
@@ -6842,287 +6725,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
return 0;
}
-#if 0
-static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 size)
-{
- struct btrfs_path *path;
- struct btrfs_inode_item *item;
- struct extent_buffer *leaf;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->leave_spinning = 1;
- ret = btrfs_insert_empty_inode(trans, root, path, objectid);
- if (ret)
- goto out;
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
- memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
- btrfs_set_inode_generation(leaf, item, 1);
- btrfs_set_inode_size(leaf, item, size);
- btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
- btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(root, path);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *group)
+/*
+ * checks to see if its even possible to relocate this block group.
+ *
+ * @return - -1 if it's not a good idea to relocate this block group, 0 if its
+ * ok to go ahead and try.
+ */
+int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
{
- struct inode *inode = NULL;
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root;
- struct btrfs_key root_key;
- u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
- int err = 0;
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_space_info *space_info;
+ struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+ struct btrfs_device *device;
+ int full = 0;
+ int ret = 0;
- root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
- root_key.type = BTRFS_ROOT_ITEM_KEY;
- root_key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(fs_info, &root_key);
- if (IS_ERR(root))
- return ERR_CAST(root);
+ block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
- trans = btrfs_start_transaction(root, 1);
- BUG_ON(!trans);
+ /* odd, couldn't find the block group, leave it alone */
+ if (!block_group)
+ return -1;
- err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
- if (err)
+ /* no bytes used, we're good */
+ if (!btrfs_block_group_used(&block_group->item))
goto out;
- err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
- BUG_ON(err);
-
- err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
- group->key.offset, 0, group->key.offset,
- 0, 0, 0);
- BUG_ON(err);
-
- inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
- if (inode->i_state & I_NEW) {
- BTRFS_I(inode)->root = root;
- BTRFS_I(inode)->location.objectid = objectid;
- BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
- BTRFS_I(inode)->location.offset = 0;
- btrfs_read_locked_inode(inode);
- unlock_new_inode(inode);
- BUG_ON(is_bad_inode(inode));
- } else {
- BUG_ON(1);
- }
- BTRFS_I(inode)->index_cnt = group->key.objectid;
-
- err = btrfs_orphan_add(trans, inode);
-out:
- btrfs_end_transaction(trans, root);
- if (err) {
- if (inode)
- iput(inode);
- inode = ERR_PTR(err);
- }
- return inode;
-}
-
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
-{
-
- struct btrfs_ordered_sum *sums;
- struct btrfs_sector_sum *sector_sum;
- struct btrfs_ordered_extent *ordered;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct list_head list;
- size_t offset;
- int ret;
- u64 disk_bytenr;
-
- INIT_LIST_HEAD(&list);
-
- ordered = btrfs_lookup_ordered_extent(inode, file_pos);
- BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
-
- disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
- ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
- disk_bytenr + len - 1, &list);
-
- while (!list_empty(&list)) {
- sums = list_entry(list.next, struct btrfs_ordered_sum, list);
- list_del_init(&sums->list);
-
- sector_sum = sums->sums;
- sums->bytenr = ordered->start;
+ space_info = block_group->space_info;
+ spin_lock(&space_info->lock);
- offset = 0;
- while (offset < sums->len) {
- sector_sum->bytenr += ordered->start - disk_bytenr;
- sector_sum++;
- offset += root->sectorsize;
- }
+ full = space_info->full;
- btrfs_add_ordered_sum(inode, ordered, sums);
+ /*
+ * if this is the last block group we have in this space, we can't
+ * relocate it unless we're able to allocate a new chunk below.
+ *
+ * Otherwise, we need to make sure we have room in the space to handle
+ * all of the extents from this block group. If we can, we're good
+ */
+ if ((space_info->total_bytes != block_group->key.offset) &&
+ (space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ btrfs_block_group_used(&block_group->item) <
+ space_info->total_bytes)) {
+ spin_unlock(&space_info->lock);
+ goto out;
}
- btrfs_put_ordered_extent(ordered);
- return 0;
-}
-
-int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_path *path;
- struct btrfs_fs_info *info = root->fs_info;
- struct extent_buffer *leaf;
- struct inode *reloc_inode;
- struct btrfs_block_group_cache *block_group;
- struct btrfs_key key;
- u64 skipped;
- u64 cur_byte;
- u64 total_found;
- u32 nritems;
- int ret;
- int progress;
- int pass = 0;
-
- root = root->fs_info->extent_root;
-
- block_group = btrfs_lookup_block_group(info, group_start);
- BUG_ON(!block_group);
-
- printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
- (unsigned long long)block_group->key.objectid,
- (unsigned long long)block_group->flags);
-
- path = btrfs_alloc_path();
- BUG_ON(!path);
-
- reloc_inode = create_reloc_inode(info, block_group);
- BUG_ON(IS_ERR(reloc_inode));
-
- __alloc_chunk_for_shrink(root, block_group, 1);
- set_block_group_readonly(block_group);
-
- btrfs_start_delalloc_inodes(info->tree_root);
- btrfs_wait_ordered_extents(info->tree_root, 0);
-again:
- skipped = 0;
- total_found = 0;
- progress = 0;
- key.objectid = block_group->key.objectid;
- key.offset = 0;
- key.type = 0;
- cur_byte = key.objectid;
-
- trans = btrfs_start_transaction(info->tree_root, 1);
- btrfs_commit_transaction(trans, info->tree_root);
+ spin_unlock(&space_info->lock);
- mutex_lock(&root->fs_info->cleaner_mutex);
- btrfs_clean_old_snapshots(info->tree_root);
- btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
- mutex_unlock(&root->fs_info->cleaner_mutex);
+ /*
+ * ok we don't have enough space, but maybe we have free space on our
+ * devices to allocate new chunks for relocation, so loop through our
+ * alloc devices and guess if we have enough space. However, if we
+ * were marked as full, then we know there aren't enough chunks, and we
+ * can just return.
+ */
+ ret = -1;
+ if (full)
+ goto out;
- trans = btrfs_start_transaction(info->tree_root, 1);
- btrfs_commit_transaction(trans, info->tree_root);
+ mutex_lock(&root->fs_info->chunk_mutex);
+ list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+ u64 min_free = btrfs_block_group_used(&block_group->item);
+ u64 dev_offset, max_avail;
- while (1) {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
-next:
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- goto out;
- if (ret == 1) {
- ret = 0;
+ /*
+ * check to make sure we can actually find a chunk with enough
+ * space to fit our block group in.
+ */
+ if (device->total_bytes > device->bytes_used + min_free) {
+ ret = find_free_dev_extent(NULL, device, min_free,
+ &dev_offset, &max_avail);
+ if (!ret)
break;
- }
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
+ ret = -1;
}
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-
- if (key.objectid >= block_group->key.objectid +
- block_group->key.offset)
- break;
-
- if (progress && need_resched()) {
- btrfs_release_path(root, path);
- cond_resched();
- progress = 0;
- continue;
- }
- progress = 1;
-
- if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
- key.objectid + key.offset <= cur_byte) {
- path->slots[0]++;
- goto next;
- }
-
- total_found++;
- cur_byte = key.objectid + key.offset;
- btrfs_release_path(root, path);
-
- __alloc_chunk_for_shrink(root, block_group, 0);
- ret = relocate_one_extent(root, path, &key, block_group,
- reloc_inode, pass);
- BUG_ON(ret < 0);
- if (ret > 0)
- skipped++;
-
- key.objectid = cur_byte;
- key.type = 0;
- key.offset = 0;
- }
-
- btrfs_release_path(root, path);
-
- if (pass == 0) {
- btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
- invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
- }
-
- if (total_found > 0) {
- printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
- (unsigned long long)total_found, pass);
- pass++;
- if (total_found == skipped && pass > 2) {
- iput(reloc_inode);
- reloc_inode = create_reloc_inode(info, block_group);
- pass = 0;
- }
- goto again;
}
-
- /* delete reloc_inode */
- iput(reloc_inode);
-
- /* unpin extents in this range */
- trans = btrfs_start_transaction(info->tree_root, 1);
- btrfs_commit_transaction(trans, info->tree_root);
-
- spin_lock(&block_group->lock);
- WARN_ON(block_group->pinned > 0);
- WARN_ON(block_group->reserved > 0);
- WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
- spin_unlock(&block_group->lock);
- btrfs_put_block_group(block_group);
- ret = 0;
+ mutex_unlock(&root->fs_info->chunk_mutex);
out:
- btrfs_free_path(path);
+ btrfs_put_block_group(block_group);
return ret;
}
-#endif
static int find_first_block_group(struct btrfs_root *root,
struct btrfs_path *path, struct btrfs_key *key)
@@ -7165,8 +6847,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_space_info *space_info;
+ struct btrfs_caching_control *caching_ctl;
struct rb_node *n;
+ down_write(&info->extent_commit_sem);
+ while (!list_empty(&info->caching_block_groups)) {
+ caching_ctl = list_entry(info->caching_block_groups.next,
+ struct btrfs_caching_control, list);
+ list_del(&caching_ctl->list);
+ put_caching_control(caching_ctl);
+ }
+ up_write(&info->extent_commit_sem);
+
spin_lock(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
block_group = rb_entry(n, struct btrfs_block_group_cache,
@@ -7180,8 +6872,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
- wait_event(block_group->caching_q,
- block_group_cache_done(block_group));
+ wait_block_group_cache_done(block_group);
btrfs_remove_free_space_cache(block_group);
@@ -7251,7 +6942,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
spin_lock_init(&cache->lock);
spin_lock_init(&cache->tree_lock);
cache->fs_info = info;
- init_waitqueue_head(&cache->caching_q);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
@@ -7273,8 +6963,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
cache->flags = btrfs_block_group_flags(&cache->item);
cache->sectorsize = root->sectorsize;
- remove_sb_from_cache(root, cache);
-
/*
* check for two cases, either we are full, and therefore
* don't need to bother with the caching work since we won't
@@ -7283,13 +6971,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* time, particularly in the full case.
*/
if (found_key.offset == btrfs_block_group_used(&cache->item)) {
+ exclude_super_stripes(root, cache);
+ cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
+ free_excluded_extents(root, cache);
} else if (btrfs_block_group_used(&cache->item) == 0) {
+ exclude_super_stripes(root, cache);
+ cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
add_new_free_space(cache, root->fs_info,
found_key.objectid,
found_key.objectid +
found_key.offset);
+ free_excluded_extents(root, cache);
}
ret = update_space_info(info, cache->flags, found_key.offset,
@@ -7297,6 +6991,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
&space_info);
BUG_ON(ret);
cache->space_info = space_info;
+ spin_lock(&cache->space_info->lock);
+ cache->space_info->bytes_super += cache->bytes_super;
+ spin_unlock(&cache->space_info->lock);
+
down_write(&space_info->groups_sem);
list_add_tail(&cache->list, &space_info->block_groups);
up_write(&space_info->groups_sem);
@@ -7346,7 +7044,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
spin_lock_init(&cache->tree_lock);
- init_waitqueue_head(&cache->caching_q);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
@@ -7355,15 +7052,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->flags = type;
btrfs_set_block_group_flags(&cache->item, type);
+ cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- remove_sb_from_cache(root, cache);
+ exclude_super_stripes(root, cache);
add_new_free_space(cache, root->fs_info, chunk_offset,
chunk_offset + size);
+ free_excluded_extents(root, cache);
+
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
BUG_ON(ret);
+
+ spin_lock(&cache->space_info->lock);
+ cache->space_info->bytes_super += cache->bytes_super;
+ spin_unlock(&cache->space_info->lock);
+
down_write(&cache->space_info->groups_sem);
list_add_tail(&cache->list, &cache->space_info->block_groups);
up_write(&cache->space_info->groups_sem);
@@ -7429,8 +7134,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
- wait_event(block_group->caching_q,
- block_group_cache_done(block_group));
+ wait_block_group_cache_done(block_group);
btrfs_remove_free_space_cache(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 68260180f58..0cb88f8146e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -367,10 +367,10 @@ static int insert_state(struct extent_io_tree *tree,
}
if (bits & EXTENT_DIRTY)
tree->dirty_bytes += end - start + 1;
- set_state_cb(tree, state, bits);
- state->state |= bits;
state->start = start;
state->end = end;
+ set_state_cb(tree, state, bits);
+ state->state |= bits;
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
@@ -471,10 +471,14 @@ static int clear_state_bit(struct extent_io_tree *tree,
* bits were already set, or zero if none of the bits were already set.
*/
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int wake, int delete, gfp_t mask)
+ int bits, int wake, int delete,
+ struct extent_state **cached_state,
+ gfp_t mask)
{
struct extent_state *state;
+ struct extent_state *cached;
struct extent_state *prealloc = NULL;
+ struct rb_node *next_node;
struct rb_node *node;
u64 last_end;
int err;
@@ -488,6 +492,17 @@ again:
}
spin_lock(&tree->lock);
+ if (cached_state) {
+ cached = *cached_state;
+ *cached_state = NULL;
+ cached_state = NULL;
+ if (cached && cached->tree && cached->start == start) {
+ atomic_dec(&cached->refs);
+ state = cached;
+ goto hit_next;
+ }
+ free_extent_state(cached);
+ }
/*
* this search will find the extents that end after
* our range starts
@@ -496,6 +511,7 @@ again:
if (!node)
goto out;
state = rb_entry(node, struct extent_state, rb_node);
+hit_next:
if (state->start > end)
goto out;
WARN_ON(state->end < start);
@@ -531,8 +547,6 @@ again:
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
- } else {
- start = state->start;
}
goto search_again;
}
@@ -550,16 +564,28 @@ again:
if (wake)
wake_up(&state->wq);
+
set |= clear_state_bit(tree, prealloc, bits,
wake, delete);
prealloc = NULL;
goto out;
}
+ if (state->end < end && prealloc && !need_resched())
+ next_node = rb_next(&state->rb_node);
+ else
+ next_node = NULL;
+
set |= clear_state_bit(tree, state, bits, wake, delete);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
+ if (start <= end && next_node) {
+ state = rb_entry(next_node, struct extent_state,
+ rb_node);
+ if (state->start == start)
+ goto hit_next;
+ }
goto search_again;
out:
@@ -653,28 +679,40 @@ static void set_state_bits(struct extent_io_tree *tree,
state->state |= bits;
}
+static void cache_state(struct extent_state *state,
+ struct extent_state **cached_ptr)
+{
+ if (cached_ptr && !(*cached_ptr)) {
+ if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
+ *cached_ptr = state;
+ atomic_inc(&state->refs);
+ }
+ }
+}
+
/*
- * set some bits on a range in the tree. This may require allocations
- * or sleeping, so the gfp mask is used to indicate what is allowed.
+ * set some bits on a range in the tree. This may require allocations or
+ * sleeping, so the gfp mask is used to indicate what is allowed.
*
- * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
- * range already has the desired bits set. The start of the existing
- * range is returned in failed_start in this case.
+ * If any of the exclusive bits are set, this will fail with -EEXIST if some
+ * part of the range already has the desired bits set. The start of the
+ * existing range is returned in failed_start in this case.
*
- * [start, end] is inclusive
- * This takes the tree lock.
+ * [start, end] is inclusive This takes the tree lock.
*/
+
static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int exclusive, u64 *failed_start,
+ int bits, int exclusive_bits, u64 *failed_start,
+ struct extent_state **cached_state,
gfp_t mask)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
struct rb_node *node;
int err = 0;
- int set;
u64 last_start;
u64 last_end;
+
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
@@ -683,6 +721,13 @@ again:
}
spin_lock(&tree->lock);
+ if (cached_state && *cached_state) {
+ state = *cached_state;
+ if (state->start == start && state->tree) {
+ node = &state->rb_node;
+ goto hit_next;
+ }
+ }
/*
* this search will find all the extents that end after
* our range starts.
@@ -694,8 +739,8 @@ again:
BUG_ON(err == -EEXIST);
goto out;
}
-
state = rb_entry(node, struct extent_state, rb_node);
+hit_next:
last_start = state->start;
last_end = state->end;
@@ -706,17 +751,29 @@ again:
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
- set = state->state & bits;
- if (set && exclusive) {
+ struct rb_node *next_node;
+ if (state->state & exclusive_bits) {
*failed_start = state->start;
err = -EEXIST;
goto out;
}
+
set_state_bits(tree, state, bits);
+ cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
+
start = last_end + 1;
+ if (start < end && prealloc && !need_resched()) {
+ next_node = rb_next(node);
+ if (next_node) {
+ state = rb_entry(next_node, struct extent_state,
+ rb_node);
+ if (state->start == start)
+ goto hit_next;
+ }
+ }
goto search_again;
}
@@ -737,8 +794,7 @@ again:
* desired bit on it.
*/
if (state->start < start) {
- set = state->state & bits;
- if (exclusive && set) {
+ if (state->state & exclusive_bits) {
*failed_start = start;
err = -EEXIST;
goto out;
@@ -750,12 +806,11 @@ again:
goto out;
if (state->end <= end) {
set_state_bits(tree, state, bits);
+ cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
- } else {
- start = state->start;
}
goto search_again;
}
@@ -774,6 +829,7 @@ again:
this_end = last_start - 1;
err = insert_state(tree, prealloc, start, this_end,
bits);
+ cache_state(prealloc, cached_state);
prealloc = NULL;
BUG_ON(err == -EEXIST);
if (err)
@@ -788,8 +844,7 @@ again:
* on the first half
*/
if (state->start <= end && state->end > end) {
- set = state->state & bits;
- if (exclusive && set) {
+ if (state->state & exclusive_bits) {
*failed_start = start;
err = -EEXIST;
goto out;
@@ -798,6 +853,7 @@ again:
BUG_ON(err == -EEXIST);
set_state_bits(tree, prealloc, bits);
+ cache_state(prealloc, cached_state);
merge_state(tree, prealloc);
prealloc = NULL;
goto out;
@@ -826,86 +882,64 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
- mask);
-}
-
-int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
+ NULL, mask);
}
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
return set_extent_bit(tree, start, end, bits, 0, NULL,
- mask);
+ NULL, mask);
}
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
- return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
+ return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
}
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_DIRTY,
- 0, NULL, mask);
+ EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
+ 0, NULL, NULL, mask);
}
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
-}
-
-int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
+ EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
+ NULL, mask);
}
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
- mask);
+ NULL, mask);
}
static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
- return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
+ return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
+ NULL, mask);
}
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
- mask);
+ NULL, mask);
}
static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask)
{
- return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
-}
-
-static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
- 0, NULL, mask);
-}
-
-static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
- u64 end, gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
+ return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
+ NULL, mask);
}
int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -917,13 +951,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
* either insert or lock state struct between start and end use mask to tell
* us if waiting is desired.
*/
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, struct extent_state **cached_state, gfp_t mask)
{
int err;
u64 failed_start;
while (1) {
- err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
- &failed_start, mask);
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
+ EXTENT_LOCKED, &failed_start,
+ cached_state, mask);
if (err == -EEXIST && (mask & __GFP_WAIT)) {
wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
start = failed_start;
@@ -935,27 +971,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
return err;
}
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+{
+ return lock_extent_bits(tree, start, end, 0, NULL, mask);
+}
+
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
int err;
u64 failed_start;
- err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
- &failed_start, mask);
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
+ &failed_start, NULL, mask);
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
- EXTENT_LOCKED, 1, 0, mask);
+ EXTENT_LOCKED, 1, 0, NULL, mask);
return 0;
}
return 1;
}
+int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
+ struct extent_state **cached, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+ mask);
+}
+
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
+ mask);
}
/*
@@ -974,7 +1023,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
page_cache_release(page);
index++;
}
- set_extent_dirty(tree, start, end, GFP_NOFS);
return 0;
}
@@ -994,7 +1042,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
page_cache_release(page);
index++;
}
- set_extent_writeback(tree, start, end, GFP_NOFS);
return 0;
}
@@ -1232,6 +1279,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode,
u64 delalloc_start;
u64 delalloc_end;
u64 found;
+ struct extent_state *cached_state = NULL;
int ret;
int loops = 0;
@@ -1269,6 +1317,7 @@ again:
/* some of the pages are gone, lets avoid looping by
* shortening the size of the delalloc range we're searching
*/
+ free_extent_state(cached_state);
if (!loops) {
unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
max_bytes = PAGE_CACHE_SIZE - offset;
@@ -1282,18 +1331,21 @@ again:
BUG_ON(ret);
/* step three, lock the state bits for the whole range */
- lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
+ lock_extent_bits(tree, delalloc_start, delalloc_end,
+ 0, &cached_state, GFP_NOFS);
/* then test to make sure it is all still delalloc */
ret = test_range_bit(tree, delalloc_start, delalloc_end,
- EXTENT_DELALLOC, 1);
+ EXTENT_DELALLOC, 1, cached_state);
if (!ret) {
- unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
+ unlock_extent_cached(tree, delalloc_start, delalloc_end,
+ &cached_state, GFP_NOFS);
__unlock_for_delalloc(inode, locked_page,
delalloc_start, delalloc_end);
cond_resched();
goto again;
}
+ free_extent_state(cached_state);
*start = delalloc_start;
*end = delalloc_end;
out_failed:
@@ -1307,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
int clear_unlock,
int clear_delalloc, int clear_dirty,
int set_writeback,
- int end_writeback)
+ int end_writeback,
+ int set_private2)
{
int ret;
struct page *pages[16];
@@ -1325,8 +1378,9 @@ int extent_clear_unlock_delalloc(struct inode *inode,
if (clear_delalloc)
clear_bits |= EXTENT_DELALLOC;
- clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
- if (!(unlock_pages || clear_dirty || set_writeback || end_writeback))
+ clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
+ if (!(unlock_pages || clear_dirty || set_writeback || end_writeback ||
+ set_private2))
return 0;
while (nr_pages > 0) {
@@ -1334,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode,
min_t(unsigned long,
nr_pages, ARRAY_SIZE(pages)), pages);
for (i = 0; i < ret; i++) {
+
+ if (set_private2)
+ SetPagePrivate2(pages[i]);
+
if (pages[i] == locked_page) {
page_cache_release(pages[i]);
continue;
@@ -1476,14 +1534,17 @@ out:
* range is found set.
*/
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int filled)
+ int bits, int filled, struct extent_state *cached)
{
struct extent_state *state = NULL;
struct rb_node *node;
int bitset = 0;
spin_lock(&tree->lock);
- node = tree_search(tree, start);
+ if (cached && cached->tree && cached->start == start)
+ node = &cached->rb_node;
+ else
+ node = tree_search(tree, start);
while (node && start <= end) {
state = rb_entry(node, struct extent_state, rb_node);
@@ -1503,6 +1564,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
bitset = 0;
break;
}
+
+ if (state->end == (u64)-1)
+ break;
+
start = state->end + 1;
if (start > end)
break;
@@ -1526,7 +1591,7 @@ static int check_page_uptodate(struct extent_io_tree *tree,
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
- if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
+ if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
SetPageUptodate(page);
return 0;
}
@@ -1540,7 +1605,7 @@ static int check_page_locked(struct extent_io_tree *tree,
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
+ if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
unlock_page(page);
return 0;
}
@@ -1552,10 +1617,7 @@ static int check_page_locked(struct extent_io_tree *tree,
static int check_page_writeback(struct extent_io_tree *tree,
struct page *page)
{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
- end_page_writeback(page);
+ end_page_writeback(page);
return 0;
}
@@ -1613,13 +1675,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
}
if (!uptodate) {
- clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
+ clear_extent_uptodate(tree, start, end, GFP_NOFS);
ClearPageUptodate(page);
SetPageError(page);
}
- clear_extent_writeback(tree, start, end, GFP_ATOMIC);
-
if (whole_page)
end_page_writeback(page);
else
@@ -1983,7 +2043,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
continue;
}
/* the get_extent function already copied into the page */
- if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
+ if (test_range_bit(tree, cur, cur_end,
+ EXTENT_UPTODATE, 1, NULL)) {
check_page_uptodate(tree, page);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
cur = cur + iosize;
@@ -2078,6 +2139,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 iosize;
u64 unlock_start;
sector_t sector;
+ struct extent_state *cached_state = NULL;
struct extent_map *em;
struct block_device *bdev;
int ret;
@@ -2124,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
delalloc_end = 0;
page_started = 0;
if (!epd->extent_locked) {
+ u64 delalloc_to_write = 0;
/*
* make sure the wbc mapping index is at least updated
* to this page.
@@ -2143,8 +2206,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
tree->ops->fill_delalloc(inode, page, delalloc_start,
delalloc_end, &page_started,
&nr_written);
+ /*
+ * delalloc_end is already one less than the total
+ * length, so we don't subtract one from
+ * PAGE_CACHE_SIZE
+ */
+ delalloc_to_write += (delalloc_end - delalloc_start +
+ PAGE_CACHE_SIZE) >>
+ PAGE_CACHE_SHIFT;
delalloc_start = delalloc_end + 1;
}
+ if (wbc->nr_to_write < delalloc_to_write) {
+ int thresh = 8192;
+
+ if (delalloc_to_write < thresh * 2)
+ thresh = delalloc_to_write;
+ wbc->nr_to_write = min_t(u64, delalloc_to_write,
+ thresh);
+ }
/* did the fill delalloc function already unlock and start
* the IO?
@@ -2160,15 +2239,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
goto done_unlocked;
}
}
- lock_extent(tree, start, page_end, GFP_NOFS);
-
- unlock_start = start;
-
if (tree->ops && tree->ops->writepage_start_hook) {
ret = tree->ops->writepage_start_hook(page, start,
page_end);
if (ret == -EAGAIN) {
- unlock_extent(tree, start, page_end, GFP_NOFS);
redirty_page_for_writepage(wbc, page);
update_nr_written(page, wbc, nr_written);
unlock_page(page);
@@ -2184,12 +2258,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
update_nr_written(page, wbc, nr_written + 1);
end = page_end;
- if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
- printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
-
if (last_byte <= start) {
- clear_extent_dirty(tree, start, page_end, GFP_NOFS);
- unlock_extent(tree, start, page_end, GFP_NOFS);
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start,
page_end, NULL, 1);
@@ -2197,13 +2266,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
goto done;
}
- set_extent_uptodate(tree, start, page_end, GFP_NOFS);
blocksize = inode->i_sb->s_blocksize;
while (cur <= end) {
if (cur >= last_byte) {
- clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
- unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur,
page_end, NULL, 1);
@@ -2235,12 +2301,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
*/
if (compressed || block_start == EXTENT_MAP_HOLE ||
block_start == EXTENT_MAP_INLINE) {
- clear_extent_dirty(tree, cur,
- cur + iosize - 1, GFP_NOFS);
-
- unlock_extent(tree, unlock_start, cur + iosize - 1,
- GFP_NOFS);
-
/*
* end_io notification does not happen here for
* compressed extents
@@ -2265,13 +2325,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
}
/* leave this out until we have a page_mkwrite call */
if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
- EXTENT_DIRTY, 0)) {
+ EXTENT_DIRTY, 0, NULL)) {
cur = cur + iosize;
pg_offset += iosize;
continue;
}
- clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
if (tree->ops && tree->ops->writepage_io_hook) {
ret = tree->ops->writepage_io_hook(page, cur,
cur + iosize - 1);
@@ -2309,12 +2368,12 @@ done:
set_page_writeback(page);
end_page_writeback(page);
}
- if (unlock_start <= page_end)
- unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
unlock_page(page);
done_unlocked:
+ /* drop our reference on any cached states */
+ free_extent_state(cached_state);
return 0;
}
@@ -2339,9 +2398,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
writepage_t writepage, void *data,
void (*flush_fn)(void *))
{
- struct backing_dev_info *bdi = mapping->backing_dev_info;
int ret = 0;
int done = 0;
+ int nr_to_write_done = 0;
struct pagevec pvec;
int nr_pages;
pgoff_t index;
@@ -2361,7 +2420,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
scanned = 1;
}
retry:
- while (!done && (index <= end) &&
+ while (!done && !nr_to_write_done && (index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY, min(end - index,
(pgoff_t)PAGEVEC_SIZE-1) + 1))) {
@@ -2412,12 +2471,15 @@ retry:
unlock_page(page);
ret = 0;
}
- if (ret || wbc->nr_to_write <= 0)
- done = 1;
- if (wbc->nonblocking && bdi_write_congested(bdi)) {
- wbc->encountered_congestion = 1;
+ if (ret)
done = 1;
- }
+
+ /*
+ * the filesystem may choose to bump up nr_to_write.
+ * We have to make sure to honor the new nr_to_write
+ * at any time
+ */
+ nr_to_write_done = wbc->nr_to_write <= 0;
}
pagevec_release(&pvec);
cond_resched();
@@ -2604,10 +2666,10 @@ int extent_invalidatepage(struct extent_io_tree *tree,
return 0;
lock_extent(tree, start, end, GFP_NOFS);
- wait_on_extent_writeback(tree, start, end);
+ wait_on_page_writeback(page);
clear_extent_bit(tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
- 1, 1, GFP_NOFS);
+ 1, 1, NULL, GFP_NOFS);
return 0;
}
@@ -2687,7 +2749,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
!isnew && !PageUptodate(page) &&
(block_off_end > to || block_off_start < from) &&
!test_range_bit(tree, block_start, cur_end,
- EXTENT_UPTODATE, 1)) {
+ EXTENT_UPTODATE, 1, NULL)) {
u64 sector;
u64 extent_offset = block_start - em->start;
size_t iosize;
@@ -2701,7 +2763,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
*/
set_extent_bit(tree, block_start,
block_start + iosize - 1,
- EXTENT_LOCKED, 0, NULL, GFP_NOFS);
+ EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
ret = submit_extent_page(READ, tree, page,
sector, iosize, page_offset, em->bdev,
NULL, 1,
@@ -2742,13 +2804,18 @@ int try_release_extent_state(struct extent_map_tree *map,
int ret = 1;
if (test_range_bit(tree, start, end,
- EXTENT_IOBITS | EXTENT_ORDERED, 0))
+ EXTENT_IOBITS, 0, NULL))
ret = 0;
else {
if ((mask & GFP_NOFS) == GFP_NOFS)
mask = GFP_NOFS;
- clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
- 1, 1, mask);
+ /*
+ * at this point we can safely clear everything except the
+ * locked bit and the nodatasum bit
+ */
+ clear_extent_bit(tree, start, end,
+ ~(EXTENT_LOCKED | EXTENT_NODATASUM),
+ 0, 0, NULL, mask);
}
return ret;
}
@@ -2771,29 +2838,28 @@ int try_release_extent_mapping(struct extent_map_tree *map,
u64 len;
while (start <= end) {
len = end - start + 1;
- spin_lock(&map->lock);
+ write_lock(&map->lock);
em = lookup_extent_mapping(map, start, len);
if (!em || IS_ERR(em)) {
- spin_unlock(&map->lock);
+ write_unlock(&map->lock);
break;
}
if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
em->start != start) {
- spin_unlock(&map->lock);
+ write_unlock(&map->lock);
free_extent_map(em);
break;
}
if (!test_range_bit(tree, em->start,
extent_map_end(em) - 1,
- EXTENT_LOCKED | EXTENT_WRITEBACK |
- EXTENT_ORDERED,
- 0)) {
+ EXTENT_LOCKED | EXTENT_WRITEBACK,
+ 0, NULL)) {
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em);
}
start = extent_map_end(em);
- spin_unlock(&map->lock);
+ write_unlock(&map->lock);
/* once for us */
free_extent_map(em);
@@ -3203,7 +3269,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
int uptodate;
unsigned long index;
- ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
+ ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
if (ret)
return 1;
while (start <= end) {
@@ -3233,7 +3299,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
return 1;
ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1);
+ EXTENT_UPTODATE, 1, NULL);
if (ret)
return ret;
@@ -3269,7 +3335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
return 0;
if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1)) {
+ EXTENT_UPTODATE, 1, NULL)) {
return 0;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5bc20abf3f3..14ed16fd862 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -13,10 +13,8 @@
#define EXTENT_DEFRAG (1 << 6)
#define EXTENT_DEFRAG_DONE (1 << 7)
#define EXTENT_BUFFER_FILLED (1 << 8)
-#define EXTENT_ORDERED (1 << 9)
-#define EXTENT_ORDERED_METADATA (1 << 10)
-#define EXTENT_BOUNDARY (1 << 11)
-#define EXTENT_NODATASUM (1 << 12)
+#define EXTENT_BOUNDARY (1 << 9)
+#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
/* flags for bio submission */
@@ -142,6 +140,8 @@ int try_release_extent_state(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
+int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, struct extent_state **cached, gfp_t mask);
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
@@ -155,11 +155,12 @@ u64 count_range_bits(struct extent_io_tree *tree,
u64 max_bytes, unsigned long bits);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int filled);
+ int bits, int filled, struct extent_state *cached_state);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int wake, int delete, gfp_t mask);
+ int bits, int wake, int delete, struct extent_state **cached,
+ gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
@@ -282,5 +283,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
int clear_unlock,
int clear_delalloc, int clear_dirty,
int set_writeback,
- int end_writeback);
+ int end_writeback,
+ int set_private2);
#endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 30c9365861e..2c726b7b9fa 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -36,7 +36,7 @@ void extent_map_exit(void)
void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
{
tree->map.rb_node = NULL;
- spin_lock_init(&tree->lock);
+ rwlock_init(&tree->lock);
}
/**
@@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
return 0;
}
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
+{
+ int ret = 0;
+ struct extent_map *merge = NULL;
+ struct rb_node *rb;
+ struct extent_map *em;
+
+ write_lock(&tree->lock);
+ em = lookup_extent_mapping(tree, start, len);
+
+ WARN_ON(em->start != start || !em);
+
+ if (!em)
+ goto out;
+
+ clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+ if (em->start != 0) {
+ rb = rb_prev(&em->rb_node);
+ if (rb)
+ merge = rb_entry(rb, struct extent_map, rb_node);
+ if (rb && mergable_maps(merge, em)) {
+ em->start = merge->start;
+ em->len += merge->len;
+ em->block_len += merge->block_len;
+ em->block_start = merge->block_start;
+ merge->in_tree = 0;
+ rb_erase(&merge->rb_node, &tree->map);
+ free_extent_map(merge);
+ }
+ }
+
+ rb = rb_next(&em->rb_node);
+ if (rb)
+ merge = rb_entry(rb, struct extent_map, rb_node);
+ if (rb && mergable_maps(em, merge)) {
+ em->len += merge->len;
+ em->block_len += merge->len;
+ rb_erase(&merge->rb_node, &tree->map);
+ merge->in_tree = 0;
+ free_extent_map(merge);
+ }
+
+ free_extent_map(em);
+out:
+ write_unlock(&tree->lock);
+ return ret;
+
+}
+
/**
* add_extent_mapping - add new extent map to the extent tree
* @tree: tree to insert new map in
@@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
ret = -EEXIST;
goto out;
}
- assert_spin_locked(&tree->lock);
rb = tree_insert(&tree->map, em->start, &em->rb_node);
if (rb) {
ret = -EEXIST;
@@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
struct rb_node *next = NULL;
u64 end = range_end(start, len);
- assert_spin_locked(&tree->lock);
rb_node = __tree_search(&tree->map, start, &prev, &next);
if (!rb_node && prev) {
em = rb_entry(prev, struct extent_map, rb_node);
@@ -319,6 +367,54 @@ out:
}
/**
+ * search_extent_mapping - find a nearby extent map
+ * @tree: tree to lookup in
+ * @start: byte offset to start the search
+ * @len: length of the lookup range
+ *
+ * Find and return the first extent_map struct in @tree that intersects the
+ * [start, len] range.
+ *
+ * If one can't be found, any nearby extent may be returned
+ */
+struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len)
+{
+ struct extent_map *em;
+ struct rb_node *rb_node;
+ struct rb_node *prev = NULL;
+ struct rb_node *next = NULL;
+
+ rb_node = __tree_search(&tree->map, start, &prev, &next);
+ if (!rb_node && prev) {
+ em = rb_entry(prev, struct extent_map, rb_node);
+ goto found;
+ }
+ if (!rb_node && next) {
+ em = rb_entry(next, struct extent_map, rb_node);
+ goto found;
+ }
+ if (!rb_node) {
+ em = NULL;
+ goto out;
+ }
+ if (IS_ERR(rb_node)) {
+ em = ERR_PTR(PTR_ERR(rb_node));
+ goto out;
+ }
+ em = rb_entry(rb_node, struct extent_map, rb_node);
+ goto found;
+
+ em = NULL;
+ goto out;
+
+found:
+ atomic_inc(&em->refs);
+out:
+ return em;
+}
+
+/**
* remove_extent_mapping - removes an extent_map from the extent tree
* @tree: extent tree to remove from
* @em: extent map beeing removed
@@ -331,7 +427,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
int ret = 0;
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
- assert_spin_locked(&tree->lock);
rb_erase(&em->rb_node, &tree->map);
em->in_tree = 0;
return ret;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index fb6eeef06bb..ab6d74b6e64 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -31,7 +31,7 @@ struct extent_map {
struct extent_map_tree {
struct rb_root map;
- spinlock_t lock;
+ rwlock_t lock;
};
static inline u64 extent_map_end(struct extent_map *em)
@@ -59,4 +59,7 @@ struct extent_map *alloc_extent_map(gfp_t mask);
void free_extent_map(struct extent_map *em);
int __init extent_map_init(void);
void extent_map_exit(void);
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len);
+struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len);
#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4b833972273..571ad3c13b4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
int err = 0;
int i;
struct inode *inode = fdentry(file)->d_inode;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- u64 hint_byte;
u64 num_bytes;
u64 start_pos;
u64 end_of_last_block;
@@ -125,22 +123,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
end_of_last_block = start_pos + num_bytes - 1;
-
- lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
- trans = btrfs_join_transaction(root, 1);
- if (!trans) {
- err = -ENOMEM;
- goto out_unlock;
- }
- btrfs_set_trans_block_group(trans, inode);
- hint_byte = 0;
-
- set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-
- /* check for reserved extents on each page, we don't want
- * to reset the delalloc bit on things that already have
- * extents reserved.
- */
btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
@@ -155,9 +137,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
* at this time.
*/
}
- err = btrfs_end_transaction(trans, root);
-out_unlock:
- unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
return err;
}
@@ -189,18 +168,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
if (!split2)
split2 = alloc_extent_map(GFP_NOFS);
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (!em) {
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
break;
}
flags = em->flags;
if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
- spin_unlock(&em_tree->lock);
if (em->start <= start &&
(!testend || em->start + em->len >= start + len)) {
free_extent_map(em);
+ write_unlock(&em_tree->lock);
break;
}
if (start < em->start) {
@@ -210,6 +189,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
start = em->start + em->len;
}
free_extent_map(em);
+ write_unlock(&em_tree->lock);
continue;
}
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@@ -260,7 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
free_extent_map(split);
split = NULL;
}
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
/* once for us */
free_extent_map(em);
@@ -289,7 +269,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
u64 start, u64 end, u64 locked_end,
- u64 inline_limit, u64 *hint_byte)
+ u64 inline_limit, u64 *hint_byte, int drop_cache)
{
u64 extent_end = 0;
u64 search_start = start;
@@ -314,7 +294,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
int ret;
inline_limit = 0;
- btrfs_drop_extent_cache(inode, start, end - 1, 0);
+ if (drop_cache)
+ btrfs_drop_extent_cache(inode, start, end - 1, 0);
path = btrfs_alloc_path();
if (!path)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 5edcee3a617..5c2caad7621 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -259,7 +259,9 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
{
- u64 max_bytes, possible_bytes;
+ u64 max_bytes;
+ u64 bitmap_bytes;
+ u64 extent_bytes;
/*
* The goal is to keep the total amount of memory used per 1gb of space
@@ -269,22 +271,27 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
max_bytes = MAX_CACHE_BYTES_PER_GIG *
(div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
- possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) +
- (sizeof(struct btrfs_free_space) *
- block_group->extents_thresh);
+ /*
+ * we want to account for 1 more bitmap than what we have so we can make
+ * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
+ * we add more bitmaps.
+ */
+ bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
- if (possible_bytes > max_bytes) {
- int extent_bytes = max_bytes -
- (block_group->total_bitmaps * PAGE_CACHE_SIZE);
+ if (bitmap_bytes >= max_bytes) {
+ block_group->extents_thresh = 0;
+ return;
+ }
- if (extent_bytes <= 0) {
- block_group->extents_thresh = 0;
- return;
- }
+ /*
+ * we want the extent entry threshold to always be at most 1/2 the maxw
+ * bytes we can have, or whatever is less than that.
+ */
+ extent_bytes = max_bytes - bitmap_bytes;
+ extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
- block_group->extents_thresh = extent_bytes /
- (sizeof(struct btrfs_free_space));
- }
+ block_group->extents_thresh =
+ div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
}
static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
@@ -403,6 +410,7 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
BUG_ON(block_group->total_bitmaps >= max_bitmaps);
info->offset = offset_to_bitmap(block_group, offset);
+ info->bytes = 0;
link_free_space(block_group, info);
block_group->total_bitmaps++;
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 6b627c61180..72ce3c173d6 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -149,6 +149,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
ptr = (unsigned long)(ref + 1);
ret = 0;
} else if (ret < 0) {
+ if (ret == -EOVERFLOW)
+ ret = -EMLINK;
goto out;
} else {
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -177,8 +179,6 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, root, path, &key,
sizeof(struct btrfs_inode_item));
- if (ret == 0 && objectid > root->highest_inode)
- root->highest_inode = objectid;
return ret;
}
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 9abbced1123..c56eb590917 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -43,9 +43,10 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
slot = path->slots[0] - 1;
l = path->nodes[0];
btrfs_item_key_to_cpu(l, &found_key, slot);
- *objectid = found_key.objectid;
+ *objectid = max_t(u64, found_key.objectid,
+ BTRFS_FIRST_FREE_OBJECTID - 1);
} else {
- *objectid = BTRFS_FIRST_FREE_OBJECTID;
+ *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
}
ret = 0;
error:
@@ -53,91 +54,27 @@ error:
return ret;
}
-/*
- * walks the btree of allocated inodes and find a hole.
- */
int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 dirid, u64 *objectid)
{
- struct btrfs_path *path;
- struct btrfs_key key;
int ret;
- int slot = 0;
- u64 last_ino = 0;
- int start_found;
- struct extent_buffer *l;
- struct btrfs_key search_key;
- u64 search_start = dirid;
-
mutex_lock(&root->objectid_mutex);
- if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID &&
- root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) {
- *objectid = ++root->last_inode_alloc;
- mutex_unlock(&root->objectid_mutex);
- return 0;
- }
- path = btrfs_alloc_path();
- BUG_ON(!path);
- search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID);
- search_key.objectid = search_start;
- search_key.type = 0;
- search_key.offset = 0;
-
- start_found = 0;
- ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
- if (ret < 0)
- goto error;
- while (1) {
- l = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(l)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto error;
- if (!start_found) {
- *objectid = search_start;
- start_found = 1;
- goto found;
- }
- *objectid = last_ino > search_start ?
- last_ino : search_start;
- goto found;
- }
- btrfs_item_key_to_cpu(l, &key, slot);
- if (key.objectid >= search_start) {
- if (start_found) {
- if (last_ino < search_start)
- last_ino = search_start;
- if (key.objectid > last_ino) {
- *objectid = last_ino;
- goto found;
- }
- } else if (key.objectid > search_start) {
- *objectid = search_start;
- goto found;
- }
- }
- if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
- break;
+ if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
+ ret = btrfs_find_highest_inode(root, &root->highest_objectid);
+ if (ret)
+ goto out;
+ }
- start_found = 1;
- last_ino = key.objectid + 1;
- path->slots[0]++;
+ if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
+ ret = -ENOSPC;
+ goto out;
}
- BUG_ON(1);
-found:
- btrfs_release_path(root, path);
- btrfs_free_path(path);
- BUG_ON(*objectid < search_start);
- mutex_unlock(&root->objectid_mutex);
- return 0;
-error:
- btrfs_release_path(root, path);
- btrfs_free_path(path);
+
+ *objectid = ++root->highest_objectid;
+ ret = 0;
+out:
mutex_unlock(&root->objectid_mutex);
return ret;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9096fd0ca3c..e9b76bcd1c1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
}
ret = btrfs_drop_extents(trans, root, inode, start,
- aligned_end, aligned_end, start, &hint_byte);
+ aligned_end, aligned_end, start,
+ &hint_byte, 1);
BUG_ON(ret);
if (isize > actual_end)
@@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
inline_len, compressed_size,
compressed_pages);
BUG_ON(ret);
- btrfs_drop_extent_cache(inode, start, aligned_end, 0);
+ btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
return 0;
}
@@ -425,7 +426,7 @@ again:
extent_clear_unlock_delalloc(inode,
&BTRFS_I(inode)->io_tree,
start, end, NULL, 1, 0,
- 0, 1, 1, 1);
+ 0, 1, 1, 1, 0);
ret = 0;
goto free_pages_out;
}
@@ -611,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode,
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
while (1) {
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
@@ -640,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- NULL, 1, 1, 0, 1, 1, 0);
+ NULL, 1, 1, 0, 1, 1, 0, 0);
ret = btrfs_submit_compressed_write(inode,
async_extent->start,
@@ -713,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode,
extent_clear_unlock_delalloc(inode,
&BTRFS_I(inode)->io_tree,
start, end, NULL, 1, 1,
- 1, 1, 1, 1);
+ 1, 1, 1, 1, 0);
*nr_written = *nr_written +
(end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
*page_started = 1;
@@ -725,6 +726,15 @@ static noinline int cow_file_range(struct inode *inode,
BUG_ON(disk_num_bytes >
btrfs_super_total_bytes(&root->fs_info->super_copy));
+
+ read_lock(&BTRFS_I(inode)->extent_tree.lock);
+ em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
+ start, num_bytes);
+ if (em) {
+ alloc_hint = em->block_start;
+ free_extent_map(em);
+ }
+ read_unlock(&BTRFS_I(inode)->extent_tree.lock);
btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
while (disk_num_bytes > 0) {
@@ -737,7 +747,6 @@ static noinline int cow_file_range(struct inode *inode,
em = alloc_extent_map(GFP_NOFS);
em->start = start;
em->orig_start = em->start;
-
ram_size = ins.offset;
em->len = ins.offset;
@@ -747,9 +756,9 @@ static noinline int cow_file_range(struct inode *inode,
set_bit(EXTENT_FLAG_PINNED, &em->flags);
while (1) {
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
@@ -776,11 +785,14 @@ static noinline int cow_file_range(struct inode *inode,
/* we're not doing compressed IO, don't unlock the first
* page (which the caller expects to stay locked), don't
* clear any dirty bits and don't set any writeback bits
+ *
+ * Do set the Private2 bit so we know this page was properly
+ * setup for writepage
*/
extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
start, start + ram_size - 1,
locked_page, unlock, 1,
- 1, 0, 0, 0);
+ 1, 0, 0, 0, 1);
disk_num_bytes -= cur_alloc_size;
num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
@@ -853,7 +865,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
int limit = 10 * 1024 * 1042;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
- EXTENT_DELALLOC, 1, 0, GFP_NOFS);
+ EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);
while (start < end) {
async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
async_cow->inode = inode;
@@ -1080,9 +1092,9 @@ out_check:
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
while (1) {
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
@@ -1101,7 +1113,7 @@ out_check:
extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
cur_offset, cur_offset + num_bytes - 1,
- locked_page, 1, 1, 1, 0, 0, 0);
+ locked_page, 1, 1, 1, 0, 0, 0, 1);
cur_offset = extent_end;
if (cur_offset > end)
break;
@@ -1374,10 +1386,8 @@ again:
lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
/* already ordered? We're done */
- if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
- EXTENT_ORDERED, 0)) {
+ if (PagePrivate2(page))
goto out;
- }
ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) {
@@ -1413,11 +1423,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
struct inode *inode = page->mapping->host;
struct btrfs_writepage_fixup *fixup;
struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret;
- ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
- EXTENT_ORDERED, 0);
- if (ret)
+ /* this page is properly in the ordered list */
+ if (TestClearPagePrivate2(page))
return 0;
if (PageChecked(page))
@@ -1455,9 +1463,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
BUG_ON(!path);
path->leave_spinning = 1;
+
+ /*
+ * we may be replacing one extent in the tree with another.
+ * The new extent is pinned in the extent map, and we don't want
+ * to drop it from the cache until it is completely in the btree.
+ *
+ * So, tell btrfs_drop_extents to leave this extent in the cache.
+ * the caller is expected to unpin it and allow it to be merged
+ * with the others.
+ */
ret = btrfs_drop_extents(trans, root, inode, file_pos,
file_pos + num_bytes, locked_end,
- file_pos, &hint);
+ file_pos, &hint, 0);
BUG_ON(ret);
ins.objectid = inode->i_ino;
@@ -1485,7 +1503,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
inode_add_bytes(inode, num_bytes);
- btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
ins.objectid = disk_bytenr;
ins.offset = disk_num_bytes;
@@ -1596,6 +1613,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->len,
compressed, 0, 0,
BTRFS_FILE_EXTENT_REG);
+ unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+ ordered_extent->file_offset,
+ ordered_extent->len);
BUG_ON(ret);
}
unlock_extent(io_tree, ordered_extent->file_offset,
@@ -1623,6 +1643,7 @@ nocow:
static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate)
{
+ ClearPagePrivate2(page);
return btrfs_finish_ordered_io(page->mapping->host, start, end);
}
@@ -1669,13 +1690,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
failrec->last_mirror = 0;
failrec->bio_flags = 0;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, failrec->len);
if (em->start > start || em->start + em->len < start) {
free_extent_map(em);
em = NULL;
}
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em || IS_ERR(em)) {
kfree(failrec);
@@ -1794,7 +1815,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
return 0;
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
- test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) {
+ test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
GFP_NOFS);
return 0;
@@ -2352,6 +2373,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
return ret;
}
+int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *dir, u64 objectid,
+ const char *name, int name_len)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_dir_item *di;
+ struct btrfs_key key;
+ u64 index;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+ name, name_len, -1);
+ BUG_ON(!di || IS_ERR(di));
+
+ leaf = path->nodes[0];
+ btrfs_dir_item_key_to_cpu(leaf, di, &key);
+ WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
+ ret = btrfs_delete_one_dir_name(trans, root, path, di);
+ BUG_ON(ret);
+ btrfs_release_path(root, path);
+
+ ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
+ objectid, root->root_key.objectid,
+ dir->i_ino, &index, name, name_len);
+ if (ret < 0) {
+ BUG_ON(ret != -ENOENT);
+ di = btrfs_search_dir_index_item(root, path, dir->i_ino,
+ name, name_len);
+ BUG_ON(!di || IS_ERR(di));
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ btrfs_release_path(root, path);
+ index = key.offset;
+ }
+
+ di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
+ index, name, name_len, -1);
+ BUG_ON(!di || IS_ERR(di));
+
+ leaf = path->nodes[0];
+ btrfs_dir_item_key_to_cpu(leaf, di, &key);
+ WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
+ ret = btrfs_delete_one_dir_name(trans, root, path, di);
+ BUG_ON(ret);
+ btrfs_release_path(root, path);
+
+ btrfs_i_size_write(dir, dir->i_size - name_len * 2);
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ ret = btrfs_update_inode(trans, root, dir);
+ BUG_ON(ret);
+ dir->i_sb->s_dirt = 1;
+
+ btrfs_free_path(path);
+ return 0;
+}
+
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
@@ -2361,29 +2445,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
struct btrfs_trans_handle *trans;
unsigned long nr = 0;
- /*
- * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir
- * the root of a subvolume or snapshot
- */
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
- inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
return -ENOTEMPTY;
- }
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, dir);
+ if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+ err = btrfs_unlink_subvol(trans, root, dir,
+ BTRFS_I(inode)->location.objectid,
+ dentry->d_name.name,
+ dentry->d_name.len);
+ goto out;
+ }
+
err = btrfs_orphan_add(trans, inode);
if (err)
- goto fail_trans;
+ goto out;
/* now the directory is empty */
err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
dentry->d_name.name, dentry->d_name.len);
if (!err)
btrfs_i_size_write(inode, 0);
-
-fail_trans:
+out:
nr = trans->blocks_used;
ret = btrfs_end_transaction_throttle(trans, root);
btrfs_btree_balance_dirty(root, nr);
@@ -2935,7 +3021,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
cur_offset,
cur_offset + hole_size,
block_end,
- cur_offset, &hint_byte);
+ cur_offset, &hint_byte, 1);
if (err)
break;
err = btrfs_insert_file_extent(trans, root,
@@ -3003,6 +3089,11 @@ void btrfs_delete_inode(struct inode *inode)
}
btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (inode->i_nlink > 0) {
+ BUG_ON(btrfs_root_refs(&root->root_item) != 0);
+ goto no_delete;
+ }
+
btrfs_i_size_write(inode, 0);
trans = btrfs_join_transaction(root, 1);
@@ -3070,29 +3161,67 @@ out_err:
* is kind of like crossing a mount point.
*/
static int fixup_tree_root_location(struct btrfs_root *root,
- struct btrfs_key *location,
- struct btrfs_root **sub_root,
- struct dentry *dentry)
+ struct inode *dir,
+ struct dentry *dentry,
+ struct btrfs_key *location,
+ struct btrfs_root **sub_root)
{
- struct btrfs_root_item *ri;
+ struct btrfs_path *path;
+ struct btrfs_root *new_root;
+ struct btrfs_root_ref *ref;
+ struct extent_buffer *leaf;
+ int ret;
+ int err = 0;
- if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
- return 0;
- if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
- return 0;
+ path = btrfs_alloc_path();
+ if (!path) {
+ err = -ENOMEM;
+ goto out;
+ }
- *sub_root = btrfs_read_fs_root(root->fs_info, location,
- dentry->d_name.name,
- dentry->d_name.len);
- if (IS_ERR(*sub_root))
- return PTR_ERR(*sub_root);
+ err = -ENOENT;
+ ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
+ BTRFS_I(dir)->root->root_key.objectid,
+ location->objectid);
+ if (ret) {
+ if (ret < 0)
+ err = ret;
+ goto out;
+ }
- ri = &(*sub_root)->root_item;
- location->objectid = btrfs_root_dirid(ri);
- btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
- location->offset = 0;
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
+ if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino ||
+ btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
+ goto out;
- return 0;
+ ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
+ (unsigned long)(ref + 1),
+ dentry->d_name.len);
+ if (ret)
+ goto out;
+
+ btrfs_release_path(root->fs_info->tree_root, path);
+
+ new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
+ if (IS_ERR(new_root)) {
+ err = PTR_ERR(new_root);
+ goto out;
+ }
+
+ if (btrfs_root_refs(&new_root->root_item) == 0) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ *sub_root = new_root;
+ location->objectid = btrfs_root_dirid(&new_root->root_item);
+ location->type = BTRFS_INODE_ITEM_KEY;
+ location->offset = 0;
+ err = 0;
+out:
+ btrfs_free_path(path);
+ return err;
}
static void inode_tree_add(struct inode *inode)
@@ -3101,11 +3230,13 @@ static void inode_tree_add(struct inode *inode)
struct btrfs_inode *entry;
struct rb_node **p;
struct rb_node *parent;
-
again:
p = &root->inode_tree.rb_node;
parent = NULL;
+ if (hlist_unhashed(&inode->i_hash))
+ return;
+
spin_lock(&root->inode_lock);
while (*p) {
parent = *p;
@@ -3132,13 +3263,87 @@ again:
static void inode_tree_del(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ int empty = 0;
spin_lock(&root->inode_lock);
if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
+ empty = RB_EMPTY_ROOT(&root->inode_tree);
}
spin_unlock(&root->inode_lock);
+
+ if (empty && btrfs_root_refs(&root->root_item) == 0) {
+ synchronize_srcu(&root->fs_info->subvol_srcu);
+ spin_lock(&root->inode_lock);
+ empty = RB_EMPTY_ROOT(&root->inode_tree);
+ spin_unlock(&root->inode_lock);
+ if (empty)
+ btrfs_add_dead_root(root);
+ }
+}
+
+int btrfs_invalidate_inodes(struct btrfs_root *root)
+{
+ struct rb_node *node;
+ struct rb_node *prev;
+ struct btrfs_inode *entry;
+ struct inode *inode;
+ u64 objectid = 0;
+
+ WARN_ON(btrfs_root_refs(&root->root_item) != 0);
+
+ spin_lock(&root->inode_lock);
+again:
+ node = root->inode_tree.rb_node;
+ prev = NULL;
+ while (node) {
+ prev = node;
+ entry = rb_entry(node, struct btrfs_inode, rb_node);
+
+ if (objectid < entry->vfs_inode.i_ino)
+ node = node->rb_left;
+ else if (objectid > entry->vfs_inode.i_ino)
+ node = node->rb_right;
+ else
+ break;
+ }
+ if (!node) {
+ while (prev) {
+ entry = rb_entry(prev, struct btrfs_inode, rb_node);
+ if (objectid <= entry->vfs_inode.i_ino) {
+ node = prev;
+ break;
+ }
+ prev = rb_next(prev);
+ }
+ }
+ while (node) {
+ entry = rb_entry(node, struct btrfs_inode, rb_node);
+ objectid = entry->vfs_inode.i_ino + 1;
+ inode = igrab(&entry->vfs_inode);
+ if (inode) {
+ spin_unlock(&root->inode_lock);
+ if (atomic_read(&inode->i_count) > 1)
+ d_prune_aliases(inode);
+ /*
+ * btrfs_drop_inode will remove it from
+ * the inode cache when its usage count
+ * hits zero.
+ */
+ iput(inode);
+ cond_resched();
+ spin_lock(&root->inode_lock);
+ goto again;
+ }
+
+ if (cond_resched_lock(&root->inode_lock))
+ goto again;
+
+ node = rb_next(node);
+ }
+ spin_unlock(&root->inode_lock);
+ return 0;
}
static noinline void init_btrfs_i(struct inode *inode)
@@ -3225,15 +3430,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
return inode;
}
+static struct inode *new_simple_dir(struct super_block *s,
+ struct btrfs_key *key,
+ struct btrfs_root *root)
+{
+ struct inode *inode = new_inode(s);
+
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ init_btrfs_i(inode);
+
+ BTRFS_I(inode)->root = root;
+ memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
+ BTRFS_I(inode)->dummy_inode = 1;
+
+ inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+ return inode;
+}
+
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
{
struct inode *inode;
- struct btrfs_inode *bi = BTRFS_I(dir);
- struct btrfs_root *root = bi->root;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
+ int index;
int ret;
+ dentry->d_op = &btrfs_dentry_operations;
+
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -3242,29 +3473,50 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
if (ret < 0)
return ERR_PTR(ret);
- inode = NULL;
- if (location.objectid) {
- ret = fixup_tree_root_location(root, &location, &sub_root,
- dentry);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- return ERR_PTR(-ENOENT);
+ if (location.objectid == 0)
+ return NULL;
+
+ if (location.type == BTRFS_INODE_ITEM_KEY) {
+ inode = btrfs_iget(dir->i_sb, &location, root);
+ return inode;
+ }
+
+ BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
+
+ index = srcu_read_lock(&root->fs_info->subvol_srcu);
+ ret = fixup_tree_root_location(root, dir, dentry,
+ &location, &sub_root);
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ inode = ERR_PTR(ret);
+ else
+ inode = new_simple_dir(dir->i_sb, &location, sub_root);
+ } else {
inode = btrfs_iget(dir->i_sb, &location, sub_root);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
}
+ srcu_read_unlock(&root->fs_info->subvol_srcu, index);
+
return inode;
}
+static int btrfs_dentry_delete(struct dentry *dentry)
+{
+ struct btrfs_root *root;
+
+ if (!dentry->d_inode)
+ return 0;
+
+ root = BTRFS_I(dentry->d_inode)->root;
+ if (btrfs_root_refs(&root->root_item) == 0)
+ return 1;
+ return 0;
+}
+
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
struct inode *inode;
- if (dentry->d_name.len > BTRFS_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
-
inode = btrfs_lookup_dentry(dir, dentry);
if (IS_ERR(inode))
return ERR_CAST(inode);
@@ -3603,9 +3855,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (ret != 0)
goto fail;
- if (objectid > root->highest_inode)
- root->highest_inode = objectid;
-
inode->i_uid = current_fsuid();
if (dir && (dir->i_mode & S_ISGID)) {
@@ -3673,26 +3922,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
struct inode *parent_inode, struct inode *inode,
const char *name, int name_len, int add_backref, u64 index)
{
- int ret;
+ int ret = 0;
struct btrfs_key key;
struct btrfs_root *root = BTRFS_I(parent_inode)->root;
- key.objectid = inode->i_ino;
- btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
- key.offset = 0;
+ if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
+ } else {
+ key.objectid = inode->i_ino;
+ btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+ key.offset = 0;
+ }
+
+ if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
+ key.objectid, root->root_key.objectid,
+ parent_inode->i_ino,
+ index, name, name_len);
+ } else if (add_backref) {
+ ret = btrfs_insert_inode_ref(trans, root,
+ name, name_len, inode->i_ino,
+ parent_inode->i_ino, index);
+ }
- ret = btrfs_insert_dir_item(trans, root, name, name_len,
- parent_inode->i_ino,
- &key, btrfs_inode_type(inode),
- index);
if (ret == 0) {
- if (add_backref) {
- ret = btrfs_insert_inode_ref(trans, root,
- name, name_len,
- inode->i_ino,
- parent_inode->i_ino,
- index);
- }
+ ret = btrfs_insert_dir_item(trans, root, name, name_len,
+ parent_inode->i_ino, &key,
+ btrfs_inode_type(inode), index);
+ BUG_ON(ret);
+
btrfs_i_size_write(parent_inode, parent_inode->i_size +
name_len * 2);
parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
@@ -3875,18 +4133,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
err = btrfs_add_nondir(trans, dentry, inode, 1, index);
- if (err)
- drop_inode = 1;
-
- btrfs_update_inode_block_group(trans, dir);
- err = btrfs_update_inode(trans, root, inode);
-
- if (err)
+ if (err) {
drop_inode = 1;
+ } else {
+ btrfs_update_inode_block_group(trans, dir);
+ err = btrfs_update_inode(trans, root, inode);
+ BUG_ON(err);
+ btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
+ }
nr = trans->blocks_used;
-
- btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
btrfs_end_transaction_throttle(trans, root);
fail:
if (drop_inode) {
@@ -4064,11 +4320,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
int compressed;
again:
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em)
em->bdev = root->fs_info->fs_devices->latest_bdev;
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (em) {
if (em->start > start || em->start + em->len <= start)
@@ -4215,6 +4471,11 @@ again:
map = kmap(page);
read_extent_buffer(leaf, map + pg_offset, ptr,
copy_size);
+ if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
+ memset(map + pg_offset + copy_size, 0,
+ PAGE_CACHE_SIZE - pg_offset -
+ copy_size);
+ }
kunmap(page);
}
flush_dcache_page(page);
@@ -4259,7 +4520,7 @@ insert:
}
err = 0;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
@@ -4299,7 +4560,7 @@ insert:
err = 0;
}
}
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
out:
if (path)
btrfs_free_path(path);
@@ -4398,13 +4659,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
u64 page_start = page_offset(page);
u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+
+ /*
+ * we have the page locked, so new writeback can't start,
+ * and the dirty bit won't be cleared while we are here.
+ *
+ * Wait for IO on this page so that we can safely clear
+ * the PagePrivate2 bit and do ordered accounting
+ */
wait_on_page_writeback(page);
+
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (offset) {
btrfs_releasepage(page, GFP_NOFS);
return;
}
-
lock_extent(tree, page_start, page_end, GFP_NOFS);
ordered = btrfs_lookup_ordered_extent(page->mapping->host,
page_offset(page));
@@ -4415,16 +4684,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
*/
clear_extent_bit(tree, page_start, page_end,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_LOCKED, 1, 0, GFP_NOFS);
- btrfs_finish_ordered_io(page->mapping->host,
- page_start, page_end);
+ EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
+ /*
+ * whoever cleared the private bit is responsible
+ * for the finish_ordered_io
+ */
+ if (TestClearPagePrivate2(page)) {
+ btrfs_finish_ordered_io(page->mapping->host,
+ page_start, page_end);
+ }
btrfs_put_ordered_extent(ordered);
lock_extent(tree, page_start, page_end, GFP_NOFS);
}
clear_extent_bit(tree, page_start, page_end,
- EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_ORDERED,
- 1, 1, GFP_NOFS);
+ EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
+ 1, 1, NULL, GFP_NOFS);
__btrfs_releasepage(page, GFP_NOFS);
ClearPageChecked(page);
@@ -4521,11 +4795,14 @@ again:
}
ClearPageChecked(page);
set_page_dirty(page);
+ SetPageUptodate(page);
BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
out_unlock:
+ if (!ret)
+ return VM_FAULT_LOCKED;
unlock_page(page);
out:
return ret;
@@ -4594,11 +4871,11 @@ out:
* create a new subvolume directory/inode (helper for the ioctl).
*/
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *new_root, struct dentry *dentry,
+ struct btrfs_root *new_root,
u64 new_dirid, u64 alloc_hint)
{
struct inode *inode;
- int error;
+ int err;
u64 index = 0;
inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
@@ -4611,11 +4888,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
inode->i_nlink = 1;
btrfs_i_size_write(inode, 0);
- error = btrfs_update_inode(trans, new_root, inode);
- if (error)
- return error;
+ err = btrfs_update_inode(trans, new_root, inode);
+ BUG_ON(err);
- d_instantiate(dentry, inode);
+ iput(inode);
return 0;
}
@@ -4693,6 +4969,16 @@ void btrfs_destroy_inode(struct inode *inode)
kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
}
+void btrfs_drop_inode(struct inode *inode)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
+ generic_delete_inode(inode);
+ else
+ generic_drop_inode(inode);
+}
+
static void init_once(void *foo)
{
struct btrfs_inode *ei = (struct btrfs_inode *) foo;
@@ -4761,31 +5047,32 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(old_dir)->root;
+ struct btrfs_root *dest = BTRFS_I(new_dir)->root;
struct inode *new_inode = new_dentry->d_inode;
struct inode *old_inode = old_dentry->d_inode;
struct timespec ctime = CURRENT_TIME;
u64 index = 0;
+ u64 root_objectid;
int ret;
- /* we're not allowed to rename between subvolumes */
- if (BTRFS_I(old_inode)->root->root_key.objectid !=
- BTRFS_I(new_dir)->root->root_key.objectid)
+ if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+ return -EPERM;
+
+ /* we only allow rename subvolume link between subvolumes */
+ if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
return -EXDEV;
- if (S_ISDIR(old_inode->i_mode) && new_inode &&
- new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
+ if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
+ (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID))
return -ENOTEMPTY;
- }
- /* to rename a snapshot or subvolume, we need to juggle the
- * backrefs. This isn't coded yet
- */
- if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
- return -EXDEV;
+ if (S_ISDIR(old_inode->i_mode) && new_inode &&
+ new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
+ return -ENOTEMPTY;
ret = btrfs_check_metadata_free_space(root);
if (ret)
- goto out_unlock;
+ return ret;
/*
* we're using rename to replace one file with another.
@@ -4796,8 +5083,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
filemap_flush(old_inode->i_mapping);
+ /* close the racy window with snapshot create/destroy ioctl */
+ if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+ down_read(&root->fs_info->subvol_sem);
+
trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, new_dir);
+
+ if (dest != root)
+ btrfs_record_root_in_trans(trans, dest);
+ ret = btrfs_set_inode_index(new_dir, &index);
+ if (ret)
+ goto out_fail;
+
+ if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ /* force full log commit if subvolume involved. */
+ root->fs_info->last_trans_log_full_commit = trans->transid;
+ } else {
+ ret = btrfs_insert_inode_ref(trans, dest,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len,
+ old_inode->i_ino,
+ new_dir->i_ino, index);
+ if (ret)
+ goto out_fail;
+ /*
+ * this is an ugly little race, but the rename is required
+ * to make sure that if we crash, the inode is either at the
+ * old name or the new one. pinning the log transaction lets
+ * us make sure we don't allow a log commit to come in after
+ * we unlink the name but before we add the new name back in.
+ */
+ btrfs_pin_log_trans(root);
+ }
/*
* make sure the inode gets flushed if it is replacing
* something.
@@ -4807,18 +5126,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
btrfs_add_ordered_operation(trans, root, old_inode);
}
- /*
- * this is an ugly little race, but the rename is required to make
- * sure that if we crash, the inode is either at the old name
- * or the new one. pinning the log transaction lets us make sure
- * we don't allow a log commit to come in after we unlink the
- * name but before we add the new name back in.
- */
- btrfs_pin_log_trans(root);
-
- btrfs_set_trans_block_group(trans, new_dir);
-
- btrfs_inc_nlink(old_dentry->d_inode);
old_dir->i_ctime = old_dir->i_mtime = ctime;
new_dir->i_ctime = new_dir->i_mtime = ctime;
old_inode->i_ctime = ctime;
@@ -4826,47 +5133,58 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
- ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
- old_dentry->d_name.name,
- old_dentry->d_name.len);
- if (ret)
- goto out_fail;
+ if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
+ ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
+ old_dentry->d_name.name,
+ old_dentry->d_name.len);
+ } else {
+ btrfs_inc_nlink(old_dentry->d_inode);
+ ret = btrfs_unlink_inode(trans, root, old_dir,
+ old_dentry->d_inode,
+ old_dentry->d_name.name,
+ old_dentry->d_name.len);
+ }
+ BUG_ON(ret);
if (new_inode) {
new_inode->i_ctime = CURRENT_TIME;
- ret = btrfs_unlink_inode(trans, root, new_dir,
- new_dentry->d_inode,
- new_dentry->d_name.name,
- new_dentry->d_name.len);
- if (ret)
- goto out_fail;
+ if (unlikely(new_inode->i_ino ==
+ BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+ root_objectid = BTRFS_I(new_inode)->location.objectid;
+ ret = btrfs_unlink_subvol(trans, dest, new_dir,
+ root_objectid,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len);
+ BUG_ON(new_inode->i_nlink == 0);
+ } else {
+ ret = btrfs_unlink_inode(trans, dest, new_dir,
+ new_dentry->d_inode,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len);
+ }
+ BUG_ON(ret);
if (new_inode->i_nlink == 0) {
ret = btrfs_orphan_add(trans, new_dentry->d_inode);
- if (ret)
- goto out_fail;
+ BUG_ON(ret);
}
-
}
- ret = btrfs_set_inode_index(new_dir, &index);
- if (ret)
- goto out_fail;
- ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode,
- old_inode, new_dentry->d_name.name,
- new_dentry->d_name.len, 1, index);
- if (ret)
- goto out_fail;
+ ret = btrfs_add_link(trans, new_dir, old_inode,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len, 0, index);
+ BUG_ON(ret);
- btrfs_log_new_name(trans, old_inode, old_dir,
- new_dentry->d_parent);
+ if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_log_new_name(trans, old_inode, old_dir,
+ new_dentry->d_parent);
+ btrfs_end_log_trans(root);
+ }
out_fail:
-
- /* this btrfs_end_log_trans just allows the current
- * log-sub transaction to complete
- */
- btrfs_end_log_trans(root);
btrfs_end_transaction_throttle(trans, root);
-out_unlock:
+
+ if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+ up_read(&root->fs_info->subvol_sem);
return ret;
}
@@ -5058,6 +5376,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
0, 0, 0,
BTRFS_FILE_EXTENT_PREALLOC);
BUG_ON(ret);
+ btrfs_drop_extent_cache(inode, cur_offset,
+ cur_offset + ins.offset -1, 0);
num_bytes -= ins.offset;
cur_offset += ins.offset;
alloc_hint = ins.objectid + ins.offset;
@@ -5223,6 +5543,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
.permission = btrfs_permission,
};
+
static struct file_operations btrfs_dir_file_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
@@ -5269,6 +5590,7 @@ static const struct address_space_operations btrfs_aops = {
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
.set_page_dirty = btrfs_set_page_dirty,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations btrfs_symlink_aops = {
@@ -5309,3 +5631,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
};
+
+struct dentry_operations btrfs_dentry_operations = {
+ .d_delete = btrfs_dentry_delete,
+};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bd88f25889f..a8577a7f26a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -230,8 +230,8 @@ static noinline int create_subvol(struct btrfs_root *root,
struct btrfs_root_item root_item;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
- struct btrfs_root *new_root = root;
- struct inode *dir;
+ struct btrfs_root *new_root;
+ struct inode *dir = dentry->d_parent->d_inode;
int ret;
int err;
u64 objectid;
@@ -241,7 +241,7 @@ static noinline int create_subvol(struct btrfs_root *root,
ret = btrfs_check_metadata_free_space(root);
if (ret)
- goto fail_commit;
+ return ret;
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
@@ -304,11 +304,17 @@ static noinline int create_subvol(struct btrfs_root *root,
if (ret)
goto fail;
+ key.offset = (u64)-1;
+ new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
+ BUG_ON(IS_ERR(new_root));
+
+ btrfs_record_root_in_trans(trans, new_root);
+
+ ret = btrfs_create_subvol_root(trans, new_root, new_dirid,
+ BTRFS_I(dir)->block_group);
/*
* insert the directory item
*/
- key.offset = (u64)-1;
- dir = dentry->d_parent->d_inode;
ret = btrfs_set_inode_index(dir, &index);
BUG_ON(ret);
@@ -322,44 +328,18 @@ static noinline int create_subvol(struct btrfs_root *root,
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
- /* add the backref first */
ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
- objectid, BTRFS_ROOT_BACKREF_KEY,
- root->root_key.objectid,
+ objectid, root->root_key.objectid,
dir->i_ino, index, name, namelen);
BUG_ON(ret);
- /* now add the forward ref */
- ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
- root->root_key.objectid, BTRFS_ROOT_REF_KEY,
- objectid,
- dir->i_ino, index, name, namelen);
-
- BUG_ON(ret);
-
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- goto fail_commit;
-
- new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
- BUG_ON(!new_root);
-
- trans = btrfs_start_transaction(new_root, 1);
- BUG_ON(!trans);
-
- ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid,
- BTRFS_I(dir)->block_group);
- if (ret)
- goto fail;
-
+ d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
nr = trans->blocks_used;
- err = btrfs_commit_transaction(trans, new_root);
+ err = btrfs_commit_transaction(trans, root);
if (err && !ret)
ret = err;
-fail_commit:
- btrfs_btree_balance_dirty(root, nr);
return ret;
}
@@ -420,14 +400,15 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
* sys_mkdirat and vfs_mkdir, but we only do a single component lookup
* inside this filesystem so it's quite a bit simpler.
*/
-static noinline int btrfs_mksubvol(struct path *parent, char *name,
- int mode, int namelen,
+static noinline int btrfs_mksubvol(struct path *parent,
+ char *name, int namelen,
struct btrfs_root *snap_src)
{
+ struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
int error;
- mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
dentry = lookup_one_len(name, parent->dentry, namelen);
error = PTR_ERR(dentry);
@@ -438,99 +419,39 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name,
if (dentry->d_inode)
goto out_dput;
- if (!IS_POSIXACL(parent->dentry->d_inode))
- mode &= ~current_umask();
-
error = mnt_want_write(parent->mnt);
if (error)
goto out_dput;
- error = btrfs_may_create(parent->dentry->d_inode, dentry);
+ error = btrfs_may_create(dir, dentry);
if (error)
goto out_drop_write;
- /*
- * Actually perform the low-level subvolume creation after all
- * this VFS fuzz.
- *
- * Eventually we want to pass in an inode under which we create this
- * subvolume, but for now all are under the filesystem root.
- *
- * Also we should pass on the mode eventually to allow creating new
- * subvolume with specific mode bits.
- */
+ down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
+
+ if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
+ goto out_up_read;
+
if (snap_src) {
- struct dentry *dir = dentry->d_parent;
- struct dentry *test = dir->d_parent;
- struct btrfs_path *path = btrfs_alloc_path();
- int ret;
- u64 test_oid;
- u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid;
-
- test_oid = snap_src->root_key.objectid;
-
- ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
- path, parent_oid, test_oid);
- if (ret == 0)
- goto create;
- btrfs_release_path(snap_src->fs_info->tree_root, path);
-
- /* we need to make sure we aren't creating a directory loop
- * by taking a snapshot of something that has our current
- * subvol in its directory tree. So, this loops through
- * the dentries and checks the forward refs for each subvolume
- * to see if is references the subvolume where we are
- * placing this new snapshot.
- */
- while (1) {
- if (!test ||
- dir == snap_src->fs_info->sb->s_root ||
- test == snap_src->fs_info->sb->s_root ||
- test->d_inode->i_sb != snap_src->fs_info->sb) {
- break;
- }
- if (S_ISLNK(test->d_inode->i_mode)) {
- printk(KERN_INFO "Btrfs symlink in snapshot "
- "path, failed\n");
- error = -EMLINK;
- btrfs_free_path(path);
- goto out_drop_write;
- }
- test_oid =
- BTRFS_I(test->d_inode)->root->root_key.objectid;
- ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
- path, test_oid, parent_oid);
- if (ret == 0) {
- printk(KERN_INFO "Btrfs snapshot creation "
- "failed, looping\n");
- error = -EMLINK;
- btrfs_free_path(path);
- goto out_drop_write;
- }
- btrfs_release_path(snap_src->fs_info->tree_root, path);
- test = test->d_parent;
- }
-create:
- btrfs_free_path(path);
- error = create_snapshot(snap_src, dentry, name, namelen);
+ error = create_snapshot(snap_src, dentry,
+ name, namelen);
} else {
- error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root,
- dentry, name, namelen);
+ error = create_subvol(BTRFS_I(dir)->root, dentry,
+ name, namelen);
}
- if (error)
- goto out_drop_write;
-
- fsnotify_mkdir(parent->dentry->d_inode, dentry);
+ if (!error)
+ fsnotify_mkdir(dir, dentry);
+out_up_read:
+ up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
out_drop_write:
mnt_drop_write(parent->mnt);
out_dput:
dput(dentry);
out_unlock:
- mutex_unlock(&parent->dentry->d_inode->i_mutex);
+ mutex_unlock(&dir->i_mutex);
return error;
}
-
static int btrfs_defrag_file(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
@@ -596,9 +517,8 @@ again:
clear_page_dirty_for_io(page);
btrfs_set_extent_delalloc(inode, page_start, page_end);
-
- unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
set_page_dirty(page);
+ unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
unlock_page(page);
page_cache_release(page);
balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
@@ -609,7 +529,8 @@ out_unlock:
return 0;
}
-static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
+static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
+ void __user *arg)
{
u64 new_size;
u64 old_size;
@@ -718,10 +639,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct btrfs_ioctl_vol_args *vol_args;
- struct btrfs_dir_item *di;
- struct btrfs_path *path;
struct file *src_file;
- u64 root_dirid;
int namelen;
int ret = 0;
@@ -739,32 +657,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
goto out;
}
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
- di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
- path, root_dirid,
- vol_args->name, namelen, 0);
- btrfs_free_path(path);
-
- if (di && !IS_ERR(di)) {
- ret = -EEXIST;
- goto out;
- }
-
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
- }
-
if (subvol) {
- ret = btrfs_mksubvol(&file->f_path, vol_args->name,
- file->f_path.dentry->d_inode->i_mode,
- namelen, NULL);
+ ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
+ NULL);
} else {
struct inode *src_inode;
src_file = fget(vol_args->fd);
@@ -781,17 +676,156 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
fput(src_file);
goto out;
}
- ret = btrfs_mksubvol(&file->f_path, vol_args->name,
- file->f_path.dentry->d_inode->i_mode,
- namelen, BTRFS_I(src_inode)->root);
+ ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
+ BTRFS_I(src_inode)->root);
fput(src_file);
}
-
out:
kfree(vol_args);
return ret;
}
+/*
+ * helper to check if the subvolume references other subvolumes
+ */
+static noinline int may_destroy_subvol(struct btrfs_root *root)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = root->root_key.objectid;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
+ &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret == 0);
+
+ ret = 0;
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid == root->root_key.objectid &&
+ key.type == BTRFS_ROOT_REF_KEY)
+ ret = -ENOTEMPTY;
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static noinline int btrfs_ioctl_snap_destroy(struct file *file,
+ void __user *arg)
+{
+ struct dentry *parent = fdentry(file);
+ struct dentry *dentry;
+ struct inode *dir = parent->d_inode;
+ struct inode *inode;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_root *dest = NULL;
+ struct btrfs_ioctl_vol_args *vol_args;
+ struct btrfs_trans_handle *trans;
+ int namelen;
+ int ret;
+ int err = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+
+ vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ namelen = strlen(vol_args->name);
+ if (strchr(vol_args->name, '/') ||
+ strncmp(vol_args->name, "..", namelen) == 0) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mnt_want_write(file->f_path.mnt);
+ if (err)
+ goto out;
+
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ dentry = lookup_one_len(vol_args->name, parent, namelen);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto out_unlock_dir;
+ }
+
+ if (!dentry->d_inode) {
+ err = -ENOENT;
+ goto out_dput;
+ }
+
+ inode = dentry->d_inode;
+ if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ err = -EINVAL;
+ goto out_dput;
+ }
+
+ dest = BTRFS_I(inode)->root;
+
+ mutex_lock(&inode->i_mutex);
+ err = d_invalidate(dentry);
+ if (err)
+ goto out_unlock;
+
+ down_write(&root->fs_info->subvol_sem);
+
+ err = may_destroy_subvol(dest);
+ if (err)
+ goto out_up_write;
+
+ trans = btrfs_start_transaction(root, 1);
+ ret = btrfs_unlink_subvol(trans, root, dir,
+ dest->root_key.objectid,
+ dentry->d_name.name,
+ dentry->d_name.len);
+ BUG_ON(ret);
+
+ btrfs_record_root_in_trans(trans, dest);
+
+ memset(&dest->root_item.drop_progress, 0,
+ sizeof(dest->root_item.drop_progress));
+ dest->root_item.drop_level = 0;
+ btrfs_set_root_refs(&dest->root_item, 0);
+
+ ret = btrfs_insert_orphan_item(trans,
+ root->fs_info->tree_root,
+ dest->root_key.objectid);
+ BUG_ON(ret);
+
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ inode->i_flags |= S_DEAD;
+out_up_write:
+ up_write(&root->fs_info->subvol_sem);
+out_unlock:
+ mutex_unlock(&inode->i_mutex);
+ if (!err) {
+ btrfs_invalidate_inodes(dest);
+ d_delete(dentry);
+ }
+out_dput:
+ dput(dentry);
+out_unlock_dir:
+ mutex_unlock(&dir->i_mutex);
+ mnt_drop_write(file->f_path.mnt);
+out:
+ kfree(vol_args);
+ return err;
+}
+
static int btrfs_ioctl_defrag(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
@@ -865,8 +899,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
return ret;
}
-static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
- u64 off, u64 olen, u64 destoff)
+static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+ u64 off, u64 olen, u64 destoff)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -976,7 +1010,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
/* punch hole in destination first */
btrfs_drop_extents(trans, root, inode, off, off + len,
- off + len, 0, &hint_byte);
+ off + len, 0, &hint_byte, 1);
/* clone data */
key.objectid = src->i_ino;
@@ -1071,8 +1105,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
datao += off - key.offset;
datal -= off - key.offset;
}
- if (key.offset + datao + datal + key.offset >
- off + len)
+ if (key.offset + datao + datal > off + len)
datal = off + len - key.offset - datao;
/* disko == 0 means it's a hole */
if (!disko)
@@ -1258,6 +1291,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_snap_create(file, argp, 0);
case BTRFS_IOC_SUBVOL_CREATE:
return btrfs_ioctl_snap_create(file, argp, 1);
+ case BTRFS_IOC_SNAP_DESTROY:
+ return btrfs_ioctl_snap_destroy(file, argp);
case BTRFS_IOC_DEFRAG:
return btrfs_ioctl_defrag(file);
case BTRFS_IOC_RESIZE:
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index b320b103fa1..bc49914475e 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -65,5 +65,6 @@ struct btrfs_ioctl_clone_range_args {
#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
struct btrfs_ioctl_vol_args)
-
+#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
+ struct btrfs_ioctl_vol_args)
#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7b2f401e604..b5d6d24726b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
*
* len is the length of the extent
*
- * This also sets the EXTENT_ORDERED bit on the range in the inode.
- *
* The tree is given a single reference on the ordered extent that was
* inserted.
*/
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->start = start;
entry->len = len;
entry->disk_len = disk_len;
+ entry->bytes_left = len;
entry->inode = inode;
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
&entry->rb_node);
BUG_ON(node);
- set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
- entry_end(entry) - 1, GFP_NOFS);
-
spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
list_add_tail(&entry->root_extent_list,
&BTRFS_I(inode)->root->fs_info->ordered_extents);
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
int ret;
tree = &BTRFS_I(inode)->ordered_tree;
mutex_lock(&tree->mutex);
- clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
- GFP_NOFS);
node = tree_search(tree, file_offset);
if (!node) {
ret = 1;
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
goto out;
}
- ret = test_range_bit(io_tree, entry->file_offset,
- entry->file_offset + entry->len - 1,
- EXTENT_ORDERED, 0);
- if (ret == 0)
+ if (io_size > entry->bytes_left) {
+ printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+ (unsigned long long)entry->bytes_left,
+ (unsigned long long)io_size);
+ }
+ entry->bytes_left -= io_size;
+ if (entry->bytes_left == 0)
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+ else
+ ret = 1;
out:
mutex_unlock(&tree->mutex);
return ret == 0;
@@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
u64 orig_end;
u64 wait_end;
struct btrfs_ordered_extent *ordered;
+ int found;
if (start + len < start) {
orig_end = INT_LIMIT(loff_t);
@@ -502,6 +501,7 @@ again:
orig_end >> PAGE_CACHE_SHIFT);
end = orig_end;
+ found = 0;
while (1) {
ordered = btrfs_lookup_first_ordered_extent(inode, end);
if (!ordered)
@@ -514,6 +514,7 @@ again:
btrfs_put_ordered_extent(ordered);
break;
}
+ found++;
btrfs_start_ordered_extent(inode, ordered, 1);
end = ordered->file_offset;
btrfs_put_ordered_extent(ordered);
@@ -521,8 +522,8 @@ again:
break;
end--;
}
- if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
- EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
+ if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
+ EXTENT_DELALLOC, 0, NULL)) {
schedule_timeout(1);
goto again;
}
@@ -613,7 +614,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
*/
if (test_range_bit(io_tree, disk_i_size,
ordered->file_offset + ordered->len - 1,
- EXTENT_DELALLOC, 0)) {
+ EXTENT_DELALLOC, 0, NULL)) {
goto out;
}
/*
@@ -664,7 +665,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
*/
if (i_size_test > entry_end(ordered) &&
!test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
- EXTENT_DELALLOC, 0)) {
+ EXTENT_DELALLOC, 0, NULL)) {
new_i_size = min_t(u64, i_size_test, i_size_read(inode));
}
BTRFS_I(inode)->disk_i_size = new_i_size;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 3d31c8827b0..993a7ea45c7 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -85,6 +85,9 @@ struct btrfs_ordered_extent {
/* extent length on disk */
u64 disk_len;
+ /* number of bytes that still need writing */
+ u64 bytes_left;
+
/* flags (described above) */
unsigned long flags;
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 3c0d52af4f8..79cba5fbc28 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -65,3 +65,23 @@ out:
btrfs_free_path(path);
return ret;
}
+
+int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = BTRFS_ORPHAN_OBJECTID;
+ key.type = BTRFS_ORPHAN_ITEM_KEY;
+ key.offset = offset;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+
+ btrfs_free_path(path);
+ return ret;
+}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index c04f7f21260..361ad323faa 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -121,6 +121,15 @@ struct inodevec {
int nr;
};
+#define MAX_EXTENTS 128
+
+struct file_extent_cluster {
+ u64 start;
+ u64 end;
+ u64 boundary[MAX_EXTENTS];
+ unsigned int nr;
+};
+
struct reloc_control {
/* block group to relocate */
struct btrfs_block_group_cache *block_group;
@@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize,
struct reloc_control *rc)
{
if (test_range_bit(&rc->processed_blocks, bytenr,
- bytenr + blocksize - 1, EXTENT_DIRTY, 1))
+ bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
return 1;
return 0;
}
@@ -2529,56 +2538,94 @@ out:
}
static noinline_for_stack
-int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
+int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
+ u64 block_start)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *em;
+ int ret = 0;
+
+ em = alloc_extent_map(GFP_NOFS);
+ if (!em)
+ return -ENOMEM;
+
+ em->start = start;
+ em->len = end + 1 - start;
+ em->block_len = em->len;
+ em->block_start = block_start;
+ em->bdev = root->fs_info->fs_devices->latest_bdev;
+ set_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+ lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+ while (1) {
+ write_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, em);
+ write_unlock(&em_tree->lock);
+ if (ret != -EEXIST) {
+ free_extent_map(em);
+ break;
+ }
+ btrfs_drop_extent_cache(inode, start, end, 0);
+ }
+ unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+ return ret;
+}
+
+static int relocate_file_extent_cluster(struct inode *inode,
+ struct file_extent_cluster *cluster)
{
u64 page_start;
u64 page_end;
- unsigned long i;
- unsigned long first_index;
+ u64 offset = BTRFS_I(inode)->index_cnt;
+ unsigned long index;
unsigned long last_index;
- unsigned int total_read = 0;
- unsigned int total_dirty = 0;
+ unsigned int dirty_page = 0;
struct page *page;
struct file_ra_state *ra;
- struct btrfs_ordered_extent *ordered;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ int nr = 0;
int ret = 0;
+ if (!cluster->nr)
+ return 0;
+
ra = kzalloc(sizeof(*ra), GFP_NOFS);
if (!ra)
return -ENOMEM;
+ index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
+ last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
+
mutex_lock(&inode->i_mutex);
- first_index = start >> PAGE_CACHE_SHIFT;
- last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
- /* make sure the dirty trick played by the caller work */
- while (1) {
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- first_index, last_index);
- if (ret != -EBUSY)
- break;
- schedule_timeout(HZ/10);
- }
+ i_size_write(inode, cluster->end + 1 - offset);
+ ret = setup_extent_mapping(inode, cluster->start - offset,
+ cluster->end - offset, cluster->start);
if (ret)
goto out_unlock;
file_ra_state_init(ra, inode->i_mapping);
- for (i = first_index ; i <= last_index; i++) {
- if (total_read % ra->ra_pages == 0) {
- btrfs_force_ra(inode->i_mapping, ra, NULL, i,
- min(last_index, ra->ra_pages + i - 1));
- }
- total_read++;
-again:
- if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
- BUG_ON(1);
- page = grab_cache_page(inode->i_mapping, i);
+ WARN_ON(cluster->start != cluster->boundary[0]);
+ while (index <= last_index) {
+ page = find_lock_page(inode->i_mapping, index);
if (!page) {
- ret = -ENOMEM;
- goto out_unlock;
+ page_cache_sync_readahead(inode->i_mapping,
+ ra, NULL, index,
+ last_index + 1 - index);
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+ }
+
+ if (PageReadahead(page)) {
+ page_cache_async_readahead(inode->i_mapping,
+ ra, NULL, page, index,
+ last_index + 1 - index);
}
+
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
lock_page(page);
@@ -2589,75 +2636,79 @@ again:
goto out_unlock;
}
}
- wait_on_page_writeback(page);
page_start = (u64)page->index << PAGE_CACHE_SHIFT;
page_end = page_start + PAGE_CACHE_SIZE - 1;
- lock_extent(io_tree, page_start, page_end, GFP_NOFS);
-
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
- if (ordered) {
- unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
- unlock_page(page);
- page_cache_release(page);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- goto again;
- }
+
+ lock_extent(&BTRFS_I(inode)->io_tree,
+ page_start, page_end, GFP_NOFS);
+
set_page_extent_mapped(page);
- if (i == first_index)
- set_extent_bits(io_tree, page_start, page_end,
+ if (nr < cluster->nr &&
+ page_start + offset == cluster->boundary[nr]) {
+ set_extent_bits(&BTRFS_I(inode)->io_tree,
+ page_start, page_end,
EXTENT_BOUNDARY, GFP_NOFS);
+ nr++;
+ }
btrfs_set_extent_delalloc(inode, page_start, page_end);
set_page_dirty(page);
- total_dirty++;
+ dirty_page++;
- unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
+ unlock_extent(&BTRFS_I(inode)->io_tree,
+ page_start, page_end, GFP_NOFS);
unlock_page(page);
page_cache_release(page);
+
+ index++;
+ if (nr < cluster->nr &&
+ page_end + 1 + offset == cluster->boundary[nr]) {
+ balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+ dirty_page);
+ dirty_page = 0;
+ }
+ }
+ if (dirty_page) {
+ balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+ dirty_page);
}
+ WARN_ON(nr != cluster->nr);
out_unlock:
mutex_unlock(&inode->i_mutex);
kfree(ra);
- balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
return ret;
}
static noinline_for_stack
-int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
+int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
+ struct file_extent_cluster *cluster)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct extent_map *em;
- u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
- u64 end = start + extent_key->offset - 1;
-
- em = alloc_extent_map(GFP_NOFS);
- em->start = start;
- em->len = extent_key->offset;
- em->block_len = extent_key->offset;
- em->block_start = extent_key->objectid;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
+ int ret;
- /* setup extent map to cheat btrfs_readpage */
- lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
- while (1) {
- int ret;
- spin_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, start, end, 0);
+ if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
+ ret = relocate_file_extent_cluster(inode, cluster);
+ if (ret)
+ return ret;
+ cluster->nr = 0;
}
- unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
- return relocate_inode_pages(inode, start, extent_key->offset);
+ if (!cluster->nr)
+ cluster->start = extent_key->objectid;
+ else
+ BUG_ON(cluster->nr >= MAX_EXTENTS);
+ cluster->end = extent_key->objectid + extent_key->offset - 1;
+ cluster->boundary[cluster->nr] = extent_key->objectid;
+ cluster->nr++;
+
+ if (cluster->nr >= MAX_EXTENTS) {
+ ret = relocate_file_extent_cluster(inode, cluster);
+ if (ret)
+ return ret;
+ cluster->nr = 0;
+ }
+ return 0;
}
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
@@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags)
return 0;
}
+
static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
{
struct rb_root blocks = RB_ROOT;
struct btrfs_key key;
+ struct file_extent_cluster *cluster;
struct btrfs_trans_handle *trans = NULL;
struct btrfs_path *path;
struct btrfs_extent_item *ei;
@@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
int ret;
int err = 0;
+ cluster = kzalloc(sizeof(*cluster), GFP_NOFS);
+ if (!cluster)
+ return -ENOMEM;
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+ rc->extents_found = 0;
+ rc->extents_skipped = 0;
+
rc->search_start = rc->block_group->key.objectid;
clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
GFP_NOFS);
@@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, rc->extent_root);
+ btrfs_end_transaction(trans, rc->extent_root);
trans = NULL;
btrfs_btree_balance_dirty(rc->extent_root, nr);
if (rc->stage == MOVE_DATA_EXTENTS &&
(flags & BTRFS_EXTENT_FLAG_DATA)) {
rc->found_file_extent = 1;
- ret = relocate_data_extent(rc->data_inode, &key);
+ ret = relocate_data_extent(rc->data_inode,
+ &key, cluster);
if (ret < 0) {
err = ret;
break;
@@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
btrfs_btree_balance_dirty(rc->extent_root, nr);
}
+ if (!err) {
+ ret = relocate_file_extent_cluster(rc->data_inode, cluster);
+ if (ret < 0)
+ err = ret;
+ }
+
+ kfree(cluster);
+
rc->create_reloc_root = 0;
smp_mb();
@@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 size)
+ struct btrfs_root *root, u64 objectid)
{
struct btrfs_path *path;
struct btrfs_inode_item *item;
@@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
btrfs_set_inode_generation(leaf, item, 1);
- btrfs_set_inode_size(leaf, item, size);
+ btrfs_set_inode_size(leaf, item, 0);
btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
btrfs_mark_buffer_dirty(leaf);
@@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
if (err)
goto out;
- err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
- BUG_ON(err);
-
- err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
- group->key.offset, 0, group->key.offset,
- 0, 0, 0);
+ err = __insert_orphan_inode(trans, root, objectid);
BUG_ON(err);
key.objectid = objectid;
@@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
btrfs_wait_ordered_extents(fs_info->tree_root, 0);
while (1) {
- mutex_lock(&fs_info->cleaner_mutex);
- btrfs_clean_old_snapshots(fs_info->tree_root);
- mutex_unlock(&fs_info->cleaner_mutex);
-
rc->extents_found = 0;
rc->extents_skipped = 0;
+ mutex_lock(&fs_info->cleaner_mutex);
+
+ btrfs_clean_old_snapshots(fs_info->tree_root);
ret = relocate_block_group(rc);
+
+ mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
err = ret;
break;
@@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
}
}
- filemap_fdatawrite_range(fs_info->btree_inode->i_mapping,
- rc->block_group->key.objectid,
- rc->block_group->key.objectid +
- rc->block_group->key.offset - 1);
+ filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
+ rc->block_group->key.objectid,
+ rc->block_group->key.objectid +
+ rc->block_group->key.offset - 1);
WARN_ON(rc->block_group->pinned > 0);
WARN_ON(rc->block_group->reserved > 0);
@@ -3530,6 +3594,26 @@ out:
return err;
}
+static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
+{
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ trans = btrfs_start_transaction(root->fs_info->tree_root, 1);
+
+ memset(&root->root_item.drop_progress, 0,
+ sizeof(root->root_item.drop_progress));
+ root->root_item.drop_level = 0;
+ btrfs_set_root_refs(&root->root_item, 0);
+ ret = btrfs_update_root(trans, root->fs_info->tree_root,
+ &root->root_key, &root->root_item);
+ BUG_ON(ret);
+
+ ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
+ BUG_ON(ret);
+ return 0;
+}
+
/*
* recover relocation interrupted by system crash.
*
@@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)
fs_root = read_fs_root(root->fs_info,
reloc_root->root_key.offset);
if (IS_ERR(fs_root)) {
- err = PTR_ERR(fs_root);
- goto out;
+ ret = PTR_ERR(fs_root);
+ if (ret != -ENOENT) {
+ err = ret;
+ goto out;
+ }
+ mark_garbage_root(reloc_root);
}
}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 0ddc6d61c55..9351428f30e 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -94,17 +94,23 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
goto out;
BUG_ON(ret == 0);
+ if (path->slots[0] == 0) {
+ ret = 1;
+ goto out;
+ }
l = path->nodes[0];
- BUG_ON(path->slots[0] == 0);
slot = path->slots[0] - 1;
btrfs_item_key_to_cpu(l, &found_key, slot);
- if (found_key.objectid != objectid) {
+ if (found_key.objectid != objectid ||
+ found_key.type != BTRFS_ROOT_ITEM_KEY) {
ret = 1;
goto out;
}
- read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
- sizeof(*item));
- memcpy(key, &found_key, sizeof(found_key));
+ if (item)
+ read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
+ sizeof(*item));
+ if (key)
+ memcpy(key, &found_key, sizeof(found_key));
ret = 0;
out:
btrfs_free_path(path);
@@ -249,6 +255,59 @@ err:
return ret;
}
+int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int err = 0;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = BTRFS_ORPHAN_OBJECTID;
+ key.type = BTRFS_ORPHAN_ITEM_KEY;
+ key.offset = 0;
+
+ while (1) {
+ ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+ if (ret < 0) {
+ err = ret;
+ break;
+ }
+
+ leaf = path->nodes[0];
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(tree_root, path);
+ if (ret < 0)
+ err = ret;
+ if (ret != 0)
+ break;
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ btrfs_release_path(tree_root, path);
+
+ if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
+ key.type != BTRFS_ORPHAN_ITEM_KEY)
+ break;
+
+ ret = btrfs_find_dead_roots(tree_root, key.offset);
+ if (ret) {
+ err = ret;
+ break;
+ }
+
+ key.offset++;
+ }
+
+ btrfs_free_path(path);
+ return err;
+}
+
/* drop the root item for 'key' from 'root' */
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_key *key)
@@ -278,31 +337,57 @@ out:
return ret;
}
-#if 0 /* this will get used when snapshot deletion is implemented */
int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
- u64 root_id, u8 type, u64 ref_id)
+ u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
+ const char *name, int name_len)
+
{
+ struct btrfs_path *path;
+ struct btrfs_root_ref *ref;
+ struct extent_buffer *leaf;
struct btrfs_key key;
+ unsigned long ptr;
+ int err = 0;
int ret;
- struct btrfs_path *path;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
key.objectid = root_id;
- key.type = type;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = ref_id;
-
+again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
- BUG_ON(ret);
-
- ret = btrfs_del_item(trans, tree_root, path);
- BUG_ON(ret);
+ BUG_ON(ret < 0);
+ if (ret == 0) {
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_root_ref);
+
+ WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
+ WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
+ ptr = (unsigned long)(ref + 1);
+ WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
+ *sequence = btrfs_root_ref_sequence(leaf, ref);
+
+ ret = btrfs_del_item(trans, tree_root, path);
+ BUG_ON(ret);
+ } else
+ err = -ENOENT;
+
+ if (key.type == BTRFS_ROOT_BACKREF_KEY) {
+ btrfs_release_path(tree_root, path);
+ key.objectid = ref_id;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = root_id;
+ goto again;
+ }
btrfs_free_path(path);
- return ret;
+ return err;
}
-#endif
int btrfs_find_root_ref(struct btrfs_root *tree_root,
struct btrfs_path *path,
@@ -319,7 +404,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
return ret;
}
-
/*
* add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
* or BTRFS_ROOT_BACKREF_KEY.
@@ -335,8 +419,7 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
*/
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
- u64 root_id, u8 type, u64 ref_id,
- u64 dirid, u64 sequence,
+ u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
const char *name, int name_len)
{
struct btrfs_key key;
@@ -346,13 +429,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
unsigned long ptr;
-
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
key.objectid = root_id;
- key.type = type;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = ref_id;
-
+again:
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
sizeof(*ref) + name_len);
BUG_ON(ret);
@@ -366,6 +450,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
write_extent_buffer(leaf, name, ptr, name_len);
btrfs_mark_buffer_dirty(leaf);
+ if (key.type == BTRFS_ROOT_BACKREF_KEY) {
+ btrfs_release_path(tree_root, path);
+ key.objectid = ref_id;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = root_id;
+ goto again;
+ }
+
btrfs_free_path(path);
- return ret;
+ return 0;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2db17cd66fc..67035385444 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -676,6 +676,7 @@ static int btrfs_unfreeze(struct super_block *sb)
}
static const struct super_operations btrfs_super_ops = {
+ .drop_inode = btrfs_drop_inode,
.delete_inode = btrfs_delete_inode,
.put_super = btrfs_put_super,
.sync_fs = btrfs_sync_fs,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index cdbb5022da5..88f866f85e7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -104,7 +104,6 @@ static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
{
if (root->ref_cows && root->last_trans < trans->transid) {
WARN_ON(root == root->fs_info->extent_root);
- WARN_ON(root->root_item.refs == 0);
WARN_ON(root->commit_root != root->node);
radix_tree_tag_set(&root->fs_info->fs_roots_radix,
@@ -720,7 +719,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
key.objectid = objectid;
- key.offset = 0;
+ /* record when the snapshot was created in key.offset */
+ key.offset = trans->transid;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
old = btrfs_lock_root_node(root);
@@ -778,24 +778,14 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
ret = btrfs_update_inode(trans, parent_root, parent_inode);
BUG_ON(ret);
- /* add the backref first */
ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
pending->root_key.objectid,
- BTRFS_ROOT_BACKREF_KEY,
parent_root->root_key.objectid,
parent_inode->i_ino, index, pending->name,
namelen);
BUG_ON(ret);
- /* now add the forward ref */
- ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
- parent_root->root_key.objectid,
- BTRFS_ROOT_REF_KEY,
- pending->root_key.objectid,
- parent_inode->i_ino, index, pending->name,
- namelen);
-
inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
d_instantiate(pending->dentry, inode);
fail:
@@ -874,7 +864,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
unsigned long timeout = 1;
struct btrfs_transaction *cur_trans;
struct btrfs_transaction *prev_trans = NULL;
- struct extent_io_tree *pinned_copy;
DEFINE_WAIT(wait);
int ret;
int should_grow = 0;
@@ -915,13 +904,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
return 0;
}
- pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
- if (!pinned_copy)
- return -ENOMEM;
-
- extent_io_tree_init(pinned_copy,
- root->fs_info->btree_inode->i_mapping, GFP_NOFS);
-
trans->transaction->in_commit = 1;
trans->transaction->blocked = 1;
if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -1019,6 +1001,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
ret = commit_cowonly_roots(trans, root);
BUG_ON(ret);
+ btrfs_prepare_extent_commit(trans, root);
+
cur_trans = root->fs_info->running_transaction;
spin_lock(&root->fs_info->new_trans_lock);
root->fs_info->running_transaction = NULL;
@@ -1042,8 +1026,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
sizeof(root->fs_info->super_copy));
- btrfs_copy_pinned(root, pinned_copy);
-
trans->transaction->blocked = 0;
wake_up(&root->fs_info->transaction_wait);
@@ -1059,8 +1041,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
*/
mutex_unlock(&root->fs_info->tree_log_mutex);
- btrfs_finish_extent_commit(trans, root, pinned_copy);
- kfree(pinned_copy);
+ btrfs_finish_extent_commit(trans, root);
/* do the directory inserts of any pending snapshot creations */
finish_pending_snapshots(trans, root->fs_info);
@@ -1096,8 +1077,13 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
while (!list_empty(&list)) {
root = list_entry(list.next, struct btrfs_root, root_list);
- list_del_init(&root->root_list);
- btrfs_drop_snapshot(root, 0);
+ list_del(&root->root_list);
+
+ if (btrfs_header_backref_rev(root->node) <
+ BTRFS_MIXED_BACKREF_REV)
+ btrfs_drop_snapshot(root, 0);
+ else
+ btrfs_drop_snapshot(root, 1);
}
return 0;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 30c0d45c1b5..7827841b55c 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -263,8 +263,8 @@ static int process_one_buffer(struct btrfs_root *log,
struct walk_control *wc, u64 gen)
{
if (wc->pin)
- btrfs_update_pinned_extents(log->fs_info->extent_root,
- eb->start, eb->len, 1);
+ btrfs_pin_extent(log->fs_info->extent_root,
+ eb->start, eb->len, 0);
if (btrfs_buffer_uptodate(eb, gen)) {
if (wc->write)
@@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
saved_nbytes = inode_get_bytes(inode);
/* drop any overlapping extents */
ret = btrfs_drop_extents(trans, root, inode,
- start, extent_end, extent_end, start, &alloc_hint);
+ start, extent_end, extent_end, start, &alloc_hint, 1);
BUG_ON(ret);
if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -2841,7 +2841,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
break;
- if (parent == sb->s_root)
+ if (IS_ROOT(parent))
break;
parent = parent->d_parent;
@@ -2880,6 +2880,12 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_no_trans;
}
+ if (root != BTRFS_I(inode)->root ||
+ btrfs_root_refs(&root->root_item) == 0) {
+ ret = 1;
+ goto end_no_trans;
+ }
+
ret = check_parent_dirs_for_sync(trans, inode, parent,
sb, last_committed);
if (ret)
@@ -2907,12 +2913,15 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
break;
inode = parent->d_inode;
+ if (root != BTRFS_I(inode)->root)
+ break;
+
if (BTRFS_I(inode)->generation >
root->fs_info->last_trans_committed) {
ret = btrfs_log_inode(trans, root, inode, inode_only);
BUG_ON(ret);
}
- if (parent == sb->s_root)
+ if (IS_ROOT(parent))
break;
parent = parent->d_parent;
@@ -2951,7 +2960,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
struct btrfs_key tmp_key;
struct btrfs_root *log;
struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
- u64 highest_inode;
struct walk_control wc = {
.process_func = process_one_buffer,
.stage = 0,
@@ -3010,11 +3018,6 @@ again:
path);
BUG_ON(ret);
}
- ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode);
- if (ret == 0) {
- wc.replay_dest->highest_inode = highest_inode;
- wc.replay_dest->last_inode_alloc = highest_inode;
- }
key.offset = found_key.offset - 1;
wc.replay_dest->log_root = NULL;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5cf405b0828..23e7d36ff32 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -276,7 +276,7 @@ loop_lock:
* is now congested. Back off and let other work structs
* run instead
*/
- if (pending && bdi_write_congested(bdi) && batch_run > 32 &&
+ if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
fs_info->fs_devices->open_devices > 1) {
struct io_context *ioc;
@@ -719,10 +719,9 @@ error:
* called very infrequently and that a given device has a small number
* of extents
*/
-static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device,
- u64 num_bytes, u64 *start,
- u64 *max_avail)
+int find_free_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device, u64 num_bytes,
+ u64 *start, u64 *max_avail)
{
struct btrfs_key key;
struct btrfs_root *root = device->dev_root;
@@ -1736,6 +1735,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
extent_root = root->fs_info->extent_root;
em_tree = &root->fs_info->mapping_tree.map_tree;
+ ret = btrfs_can_relocate(extent_root, chunk_offset);
+ if (ret)
+ return -ENOSPC;
+
/* step one, relocate all the extents inside this chunk */
ret = btrfs_relocate_block_group(extent_root, chunk_offset);
BUG_ON(ret);
@@ -1749,9 +1752,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
* step two, delete the device extents and the
* chunk tree entries
*/
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, chunk_offset, 1);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
BUG_ON(em->start > chunk_offset ||
em->start + em->len < chunk_offset);
@@ -1780,9 +1783,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
BUG_ON(ret);
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
remove_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
kfree(map);
em->bdev = NULL;
@@ -1807,12 +1810,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
struct btrfs_key found_key;
u64 chunk_tree = chunk_root->root_key.objectid;
u64 chunk_type;
+ bool retried = false;
+ int failed = 0;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+again:
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
key.offset = (u64)-1;
key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -1842,7 +1848,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
found_key.objectid,
found_key.offset);
- BUG_ON(ret);
+ if (ret == -ENOSPC)
+ failed++;
+ else if (ret)
+ BUG();
}
if (found_key.offset == 0)
@@ -1850,6 +1859,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
key.offset = found_key.offset - 1;
}
ret = 0;
+ if (failed && !retried) {
+ failed = 0;
+ retried = true;
+ goto again;
+ } else if (failed && retried) {
+ WARN_ON(1);
+ ret = -ENOSPC;
+ }
error:
btrfs_free_path(path);
return ret;
@@ -1894,6 +1911,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
continue;
ret = btrfs_shrink_device(device, old_size - size_to_free);
+ if (ret == -ENOSPC)
+ break;
BUG_ON(ret);
trans = btrfs_start_transaction(dev_root, 1);
@@ -1938,9 +1957,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
chunk = btrfs_item_ptr(path->nodes[0],
path->slots[0],
struct btrfs_chunk);
- key.offset = found_key.offset;
/* chunk zero is special */
- if (key.offset == 0)
+ if (found_key.offset == 0)
break;
btrfs_release_path(chunk_root, path);
@@ -1948,7 +1966,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
chunk_root->root_key.objectid,
found_key.objectid,
found_key.offset);
- BUG_ON(ret);
+ BUG_ON(ret && ret != -ENOSPC);
+ key.offset = found_key.offset - 1;
}
ret = 0;
error:
@@ -1974,10 +1993,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
u64 chunk_offset;
int ret;
int slot;
+ int failed = 0;
+ bool retried = false;
struct extent_buffer *l;
struct btrfs_key key;
struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
u64 old_total = btrfs_super_total_bytes(super_copy);
+ u64 old_size = device->total_bytes;
u64 diff = device->total_bytes - new_size;
if (new_size >= device->total_bytes)
@@ -1987,12 +2009,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
if (!path)
return -ENOMEM;
- trans = btrfs_start_transaction(root, 1);
- if (!trans) {
- ret = -ENOMEM;
- goto done;
- }
-
path->reada = 2;
lock_chunks(root);
@@ -2001,8 +2017,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
if (device->writeable)
device->fs_devices->total_rw_bytes -= diff;
unlock_chunks(root);
- btrfs_end_transaction(trans, root);
+again:
key.objectid = device->devid;
key.offset = (u64)-1;
key.type = BTRFS_DEV_EXTENT_KEY;
@@ -2017,6 +2033,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
goto done;
if (ret) {
ret = 0;
+ btrfs_release_path(root, path);
break;
}
@@ -2024,14 +2041,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
slot = path->slots[0];
btrfs_item_key_to_cpu(l, &key, path->slots[0]);
- if (key.objectid != device->devid)
+ if (key.objectid != device->devid) {
+ btrfs_release_path(root, path);
break;
+ }
dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
length = btrfs_dev_extent_length(l, dev_extent);
- if (key.offset + length <= new_size)
+ if (key.offset + length <= new_size) {
+ btrfs_release_path(root, path);
break;
+ }
chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -2040,8 +2061,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
chunk_offset);
- if (ret)
+ if (ret && ret != -ENOSPC)
goto done;
+ if (ret == -ENOSPC)
+ failed++;
+ key.offset -= 1;
+ }
+
+ if (failed && !retried) {
+ failed = 0;
+ retried = true;
+ goto again;
+ } else if (failed && retried) {
+ ret = -ENOSPC;
+ lock_chunks(root);
+
+ device->total_bytes = old_size;
+ if (device->writeable)
+ device->fs_devices->total_rw_bytes += diff;
+ unlock_chunks(root);
+ goto done;
}
/* Shrinking succeeded, else we would be at "done". */
@@ -2294,9 +2333,9 @@ again:
em->block_len = em->len;
em_tree = &extent_root->fs_info->mapping_tree.map_tree;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
BUG_ON(ret);
free_extent_map(em);
@@ -2491,9 +2530,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
int readonly = 0;
int i;
- spin_lock(&map_tree->map_tree.lock);
+ read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
- spin_unlock(&map_tree->map_tree.lock);
+ read_unlock(&map_tree->map_tree.lock);
if (!em)
return 1;
@@ -2518,11 +2557,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
struct extent_map *em;
while (1) {
- spin_lock(&tree->map_tree.lock);
+ write_lock(&tree->map_tree.lock);
em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
if (em)
remove_extent_mapping(&tree->map_tree, em);
- spin_unlock(&tree->map_tree.lock);
+ write_unlock(&tree->map_tree.lock);
if (!em)
break;
kfree(em->bdev);
@@ -2540,9 +2579,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
struct extent_map_tree *em_tree = &map_tree->map_tree;
int ret;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, len);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
@@ -2604,9 +2643,9 @@ again:
atomic_set(&multi->error, 0);
}
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, *length);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em && unplug_page)
return 0;
@@ -2763,9 +2802,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
u64 stripe_nr;
int i, j, nr = 0;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, chunk_start, 1);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
BUG_ON(!em || em->start != chunk_start);
map = (struct map_lookup *)em->bdev;
@@ -3053,9 +3092,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
- spin_lock(&map_tree->map_tree.lock);
+ read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
- spin_unlock(&map_tree->map_tree.lock);
+ read_unlock(&map_tree->map_tree.lock);
/* already mapped? */
if (em && em->start <= logical && em->start + em->len > logical) {
@@ -3114,9 +3153,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
map->stripes[i].dev->in_fs_metadata = 1;
}
- spin_lock(&map_tree->map_tree.lock);
+ write_lock(&map_tree->map_tree.lock);
ret = add_extent_mapping(&map_tree->map_tree, em);
- spin_unlock(&map_tree->map_tree.lock);
+ write_unlock(&map_tree->map_tree.lock);
BUG_ON(ret);
free_extent_map(em);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5139a833f72..31b0fabdd2e 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root);
void btrfs_unlock_volumes(void);
void btrfs_lock_volumes(void);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
+int find_free_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device, u64 num_bytes,
+ u64 *start, u64 *max_avail);
#endif
diff --git a/fs/buffer.c b/fs/buffer.c
index 209f7f15f5f..24afd7422ae 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
struct address_space *mapping = inode->i_mapping;
struct page *page;
void *fsdata;
- unsigned long limit;
int err;
- err = -EFBIG;
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && size > (loff_t)limit) {
- send_sig(SIGXFSZ, current, 0);
- goto out;
- }
- if (size > inode->i_sb->s_maxbytes)
+ err = inode_newsize_ok(inode, size);
+ if (err)
goto out;
err = pagecache_write_begin(NULL, mapping, size, 0,
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3cbc57f932d..d6db933df2b 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -264,7 +264,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
{
struct char_device_struct *cd;
struct cdev *cdev;
- char *s;
int err = -ENOMEM;
cd = __register_chrdev_region(major, baseminor, count, name);
@@ -278,8 +277,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
cdev->owner = fops->owner;
cdev->ops = fops;
kobject_set_name(&cdev->kobj, "%s", name);
- for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/'))
- *s = '!';
err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
if (err)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d79ce2e95c2..90c5b39f031 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -185,8 +185,7 @@ out_mount_failed:
cifs_sb->mountdata = NULL;
}
#endif
- if (cifs_sb->local_nls)
- unload_nls(cifs_sb->local_nls);
+ unload_nls(cifs_sb->local_nls);
kfree(cifs_sb);
}
return rc;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1f09c761931..5e2492535da 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
static int cifs_vmtruncate(struct inode *inode, loff_t offset)
{
- struct address_space *mapping = inode->i_mapping;
- unsigned long limit;
+ loff_t oldsize;
+ int err;
spin_lock(&inode->i_lock);
- if (inode->i_size < offset)
- goto do_expand;
- /*
- * truncation of in-use swapfiles is disallowed - it would cause
- * subsequent swapout to scribble on the now-freed blocks.
- */
- if (IS_SWAPFILE(inode)) {
- spin_unlock(&inode->i_lock);
- goto out_busy;
- }
- i_size_write(inode, offset);
- spin_unlock(&inode->i_lock);
- /*
- * unmap_mapping_range is called twice, first simply for efficiency
- * so that truncate_inode_pages does fewer single-page unmaps. However
- * after this first call, and before truncate_inode_pages finishes,
- * it is possible for private pages to be COWed, which remain after
- * truncate_inode_pages finishes, hence the second unmap_mapping_range
- * call must be made for correctness.
- */
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, offset);
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- goto out_truncate;
-
-do_expand:
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && offset > limit) {
+ err = inode_newsize_ok(inode, offset);
+ if (err) {
spin_unlock(&inode->i_lock);
- goto out_sig;
- }
- if (offset > inode->i_sb->s_maxbytes) {
- spin_unlock(&inode->i_lock);
- goto out_big;
+ goto out;
}
+
+ oldsize = inode->i_size;
i_size_write(inode, offset);
spin_unlock(&inode->i_lock);
-out_truncate:
+ truncate_pagecache(inode, oldsize, offset);
if (inode->i_op->truncate)
inode->i_op->truncate(inode);
- return 0;
-out_sig:
- send_sig(SIGXFSZ, current, 0);
-out_big:
- return -EFBIG;
-out_busy:
- return -ETXTBSY;
+out:
+ return err;
}
static int
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
index 8ccd5ed81d9..d99860a3389 100644
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -2,6 +2,7 @@
#define _CODA_INT_
struct dentry;
+struct file;
extern struct file_system_type coda_fs_type;
extern unsigned long coda_timeout;
diff --git a/fs/compat.c b/fs/compat.c
index 3aa48834a22..d576b552e8e 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
char __user * type, unsigned long flags,
void __user * data)
{
- unsigned long type_page;
+ char *kernel_type;
unsigned long data_page;
- unsigned long dev_page;
+ char *kernel_dev;
char *dir_page;
int retval;
- retval = copy_mount_options (type, &type_page);
+ retval = copy_mount_string(type, &kernel_type);
if (retval < 0)
goto out;
@@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
if (IS_ERR(dir_page))
goto out1;
- retval = copy_mount_options (dev_name, &dev_page);
+ retval = copy_mount_string(dev_name, &kernel_dev);
if (retval < 0)
goto out2;
- retval = copy_mount_options (data, &data_page);
+ retval = copy_mount_options(data, &data_page);
if (retval < 0)
goto out3;
retval = -EINVAL;
- if (type_page && data_page) {
- if (!strcmp((char *)type_page, SMBFS_NAME)) {
+ if (kernel_type && data_page) {
+ if (!strcmp(kernel_type, SMBFS_NAME)) {
do_smb_super_data_conv((void *)data_page);
- } else if (!strcmp((char *)type_page, NCPFS_NAME)) {
+ } else if (!strcmp(kernel_type, NCPFS_NAME)) {
do_ncp_super_data_conv((void *)data_page);
- } else if (!strcmp((char *)type_page, NFS4_NAME)) {
+ } else if (!strcmp(kernel_type, NFS4_NAME)) {
if (do_nfs4_super_data_conv((void *) data_page))
goto out4;
}
}
- retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
+ retval = do_mount(kernel_dev, dir_page, kernel_type,
flags, (void*)data_page);
out4:
free_page(data_page);
out3:
- free_page(dev_page);
+ kfree(kernel_dev);
out2:
putname(dir_page);
out1:
- free_page(type_page);
+ kfree(kernel_type);
out:
return retval;
}
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index a2edb791344..31f4b0e6d72 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -63,9 +63,9 @@ static void drop_slab(void)
}
int drop_caches_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ proc_dointvec_minmax(table, write, buffer, length, ppos);
if (write) {
if (sysctl_drop_caches & 1)
drop_pagecache();
diff --git a/fs/exec.c b/fs/exec.c
index 5c833c18d0d..d49be6bc179 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
#include <linux/kmod.h>
#include <linux/fsnotify.h>
#include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -63,6 +64,7 @@
int core_uses_pid;
char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
int suid_dumpable = 0;
/* The maximal length of core_pattern is also specified in sysctl.c */
@@ -1393,18 +1395,16 @@ out_ret:
return retval;
}
-int set_binfmt(struct linux_binfmt *new)
+void set_binfmt(struct linux_binfmt *new)
{
- struct linux_binfmt *old = current->binfmt;
+ struct mm_struct *mm = current->mm;
- if (new) {
- if (!try_module_get(new->module))
- return -1;
- }
- current->binfmt = new;
- if (old)
- module_put(old->module);
- return 0;
+ if (mm->binfmt)
+ module_put(mm->binfmt->module);
+
+ mm->binfmt = new;
+ if (new)
+ __module_get(new->module);
}
EXPORT_SYMBOL(set_binfmt);
@@ -1728,6 +1728,29 @@ int get_dumpable(struct mm_struct *mm)
return (ret >= 2) ? 2 : ret;
}
+static void wait_for_dump_helpers(struct file *file)
+{
+ struct pipe_inode_info *pipe;
+
+ pipe = file->f_path.dentry->d_inode->i_pipe;
+
+ pipe_lock(pipe);
+ pipe->readers++;
+ pipe->writers--;
+
+ while ((pipe->readers > 1) && (!signal_pending(current))) {
+ wake_up_interruptible_sync(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ pipe_wait(pipe);
+ }
+
+ pipe->readers--;
+ pipe->writers++;
+ pipe_unlock(pipe);
+
+}
+
+
void do_coredump(long signr, int exit_code, struct pt_regs *regs)
{
struct core_state core_state;
@@ -1744,11 +1767,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
char **helper_argv = NULL;
int helper_argc = 0;
- char *delimit;
+ int dump_count = 0;
+ static atomic_t core_dump_count = ATOMIC_INIT(0);
audit_core_dumps(signr);
- binfmt = current->binfmt;
+ binfmt = mm->binfmt;
if (!binfmt || !binfmt->core_dump)
goto fail;
@@ -1799,54 +1823,63 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
lock_kernel();
ispipe = format_corename(corename, signr);
unlock_kernel();
- /*
- * Don't bother to check the RLIMIT_CORE value if core_pattern points
- * to a pipe. Since we're not writing directly to the filesystem
- * RLIMIT_CORE doesn't really apply, as no actual core file will be
- * created unless the pipe reader choses to write out the core file
- * at which point file size limits and permissions will be imposed
- * as it does with any other process
- */
+
if ((!ispipe) && (core_limit < binfmt->min_coredump))
goto fail_unlock;
if (ispipe) {
+ if (core_limit == 0) {
+ /*
+ * Normally core limits are irrelevant to pipes, since
+ * we're not writing to the file system, but we use
+ * core_limit of 0 here as a speacial value. Any
+ * non-zero limit gets set to RLIM_INFINITY below, but
+ * a limit of 0 skips the dump. This is a consistent
+ * way to catch recursive crashes. We can still crash
+ * if the core_pattern binary sets RLIM_CORE = !0
+ * but it runs as root, and can do lots of stupid things
+ * Note that we use task_tgid_vnr here to grab the pid
+ * of the process group leader. That way we get the
+ * right pid if a thread in a multi-threaded
+ * core_pattern process dies.
+ */
+ printk(KERN_WARNING
+ "Process %d(%s) has RLIMIT_CORE set to 0\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_unlock;
+ }
+
+ dump_count = atomic_inc_return(&core_dump_count);
+ if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+ printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_dropcount;
+ }
+
helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
if (!helper_argv) {
printk(KERN_WARNING "%s failed to allocate memory\n",
__func__);
- goto fail_unlock;
- }
- /* Terminate the string before the first option */
- delimit = strchr(corename, ' ');
- if (delimit)
- *delimit = '\0';
- delimit = strrchr(helper_argv[0], '/');
- if (delimit)
- delimit++;
- else
- delimit = helper_argv[0];
- if (!strcmp(delimit, current->comm)) {
- printk(KERN_NOTICE "Recursive core dump detected, "
- "aborting\n");
- goto fail_unlock;
+ goto fail_dropcount;
}
core_limit = RLIM_INFINITY;
/* SIGPIPE can happen, but it's just never processed */
- if (call_usermodehelper_pipe(corename+1, helper_argv, NULL,
+ if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
&file)) {
printk(KERN_INFO "Core dump to %s pipe failed\n",
corename);
- goto fail_unlock;
+ goto fail_dropcount;
}
} else
file = filp_open(corename,
O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
0600);
if (IS_ERR(file))
- goto fail_unlock;
+ goto fail_dropcount;
inode = file->f_path.dentry->d_inode;
if (inode->i_nlink > 1)
goto close_fail; /* multiple links - don't dump */
@@ -1875,7 +1908,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
if (retval)
current->signal->group_exit_code |= 0x80;
close_fail:
+ if (ispipe && core_pipe_limit)
+ wait_for_dump_helpers(file);
filp_close(file, NULL);
+fail_dropcount:
+ if (dump_count)
+ atomic_dec(&core_dump_count);
fail_unlock:
if (helper_argv)
argv_free(helper_argv);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 5ab10c3bbeb..9f500dec3b5 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
}
lock_super(sb);
- lock_kernel();
sbi = sb->s_fs_info;
fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
@@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
out:
if (or)
osd_end_request(or);
- unlock_kernel();
unlock_super(sb);
kfree(fscb);
return ret;
@@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb)
int num_pend;
struct exofs_sb_info *sbi = sb->s_fs_info;
- lock_kernel();
-
if (sb->s_dirt)
exofs_write_super(sb);
@@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb)
osduld_put_device(sbi->s_dev);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
-
- unlock_kernel();
}
/*
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 1c1638f873a..ade634076d0 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -819,6 +819,7 @@ const struct address_space_operations ext2_aops = {
.writepages = ext2_writepages,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
const struct address_space_operations ext2_aops_xip = {
@@ -837,6 +838,7 @@ const struct address_space_operations ext2_nobh_aops = {
.direct_IO = ext2_direct_IO,
.writepages = ext2_writepages,
.migratepage = buffer_migrate_page,
+ .error_remove_page = generic_error_remove_page,
};
/*
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index cd098a7b77f..acf1b142332 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1830,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = {
.direct_IO = ext3_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations ext3_writeback_aops = {
@@ -1845,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = {
.direct_IO = ext3_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations ext3_journalled_aops = {
@@ -1859,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = {
.invalidatepage = ext3_invalidatepage,
.releasepage = ext3_releasepage,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
void ext3_set_aops(struct inode *inode)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3a798737e30..064746fad58 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3386,6 +3386,7 @@ static const struct address_space_operations ext4_ordered_aops = {
.direct_IO = ext4_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations ext4_writeback_aops = {
@@ -3401,6 +3402,7 @@ static const struct address_space_operations ext4_writeback_aops = {
.direct_IO = ext4_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations ext4_journalled_aops = {
@@ -3415,6 +3417,7 @@ static const struct address_space_operations ext4_journalled_aops = {
.invalidatepage = ext4_invalidatepage,
.releasepage = ext4_releasepage,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations ext4_da_aops = {
@@ -3431,6 +3434,7 @@ static const struct address_space_operations ext4_da_aops = {
.direct_IO = ext4_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
void ext4_set_aops(struct inode *inode)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8970d8c49bb..04629d1302f 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb)
iput(sbi->fat_inode);
- if (sbi->nls_disk) {
- unload_nls(sbi->nls_disk);
- sbi->nls_disk = NULL;
- sbi->options.codepage = fat_default_codepage;
- }
- if (sbi->nls_io) {
- unload_nls(sbi->nls_io);
- sbi->nls_io = NULL;
- }
- if (sbi->options.iocharset != fat_default_iocharset) {
+ unload_nls(sbi->nls_disk);
+ unload_nls(sbi->nls_io);
+
+ if (sbi->options.iocharset != fat_default_iocharset)
kfree(sbi->options.iocharset);
- sbi->options.iocharset = fat_default_iocharset;
- }
sb->s_fs_info = NULL;
kfree(sbi);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ae413086db9..fc089f2f7f5 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp)
return pid;
}
+static int f_setown_ex(struct file *filp, unsigned long arg)
+{
+ struct f_owner_ex * __user owner_p = (void * __user)arg;
+ struct f_owner_ex owner;
+ struct pid *pid;
+ int type;
+ int ret;
+
+ ret = copy_from_user(&owner, owner_p, sizeof(owner));
+ if (ret)
+ return ret;
+
+ switch (owner.type) {
+ case F_OWNER_TID:
+ type = PIDTYPE_MAX;
+ break;
+
+ case F_OWNER_PID:
+ type = PIDTYPE_PID;
+ break;
+
+ case F_OWNER_GID:
+ type = PIDTYPE_PGID;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+ pid = find_vpid(owner.pid);
+ if (owner.pid && !pid)
+ ret = -ESRCH;
+ else
+ ret = __f_setown(filp, pid, type, 1);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int f_getown_ex(struct file *filp, unsigned long arg)
+{
+ struct f_owner_ex * __user owner_p = (void * __user)arg;
+ struct f_owner_ex owner;
+ int ret = 0;
+
+ read_lock(&filp->f_owner.lock);
+ owner.pid = pid_vnr(filp->f_owner.pid);
+ switch (filp->f_owner.pid_type) {
+ case PIDTYPE_MAX:
+ owner.type = F_OWNER_TID;
+ break;
+
+ case PIDTYPE_PID:
+ owner.type = F_OWNER_PID;
+ break;
+
+ case PIDTYPE_PGID:
+ owner.type = F_OWNER_GID;
+ break;
+
+ default:
+ WARN_ON(1);
+ ret = -EINVAL;
+ break;
+ }
+ read_unlock(&filp->f_owner.lock);
+
+ if (!ret)
+ ret = copy_to_user(owner_p, &owner, sizeof(owner));
+ return ret;
+}
+
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
@@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_SETOWN:
err = f_setown(filp, arg, 1);
break;
+ case F_GETOWN_EX:
+ err = f_getown_ex(filp, arg);
+ break;
+ case F_SETOWN_EX:
+ err = f_setown_ex(filp, arg);
+ break;
case F_GETSIG:
err = filp->f_owner.signum;
break;
@@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p,
static void send_sigio_to_task(struct task_struct *p,
struct fown_struct *fown,
- int fd,
- int reason)
+ int fd, int reason, int group)
{
/*
* F_SETSIG can change ->signum lockless in parallel, make
@@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p,
else
si.si_band = band_table[reason - POLL_IN];
si.si_fd = fd;
- if (!group_send_sig_info(signum, &si, p))
+ if (!do_send_sig_info(signum, &si, p, group))
break;
/* fall-through: fall back on the old plain SIGIO signal */
case 0:
- group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
+ do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
}
}
@@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
struct task_struct *p;
enum pid_type type;
struct pid *pid;
+ int group = 1;
read_lock(&fown->lock);
+
type = fown->pid_type;
+ if (type == PIDTYPE_MAX) {
+ group = 0;
+ type = PIDTYPE_PID;
+ }
+
pid = fown->pid;
if (!pid)
goto out_unlock_fown;
read_lock(&tasklist_lock);
do_each_pid_task(pid, type, p) {
- send_sigio_to_task(p, fown, fd, band);
+ send_sigio_to_task(p, fown, fd, band, group);
} while_each_pid_task(pid, type, p);
read_unlock(&tasklist_lock);
out_unlock_fown:
@@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
}
static void send_sigurg_to_task(struct task_struct *p,
- struct fown_struct *fown)
+ struct fown_struct *fown, int group)
{
if (sigio_perm(p, fown, SIGURG))
- group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
+ do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
}
int send_sigurg(struct fown_struct *fown)
@@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown)
struct task_struct *p;
enum pid_type type;
struct pid *pid;
+ int group = 1;
int ret = 0;
read_lock(&fown->lock);
+
type = fown->pid_type;
+ if (type == PIDTYPE_MAX) {
+ group = 0;
+ type = PIDTYPE_PID;
+ }
+
pid = fown->pid;
if (!pid)
goto out_unlock_fown;
@@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown)
read_lock(&tasklist_lock);
do_each_pid_task(pid, type, p) {
- send_sigurg_to_task(p, fown);
+ send_sigurg_to_task(p, fown, group);
} while_each_pid_task(pid, type, p);
read_unlock(&tasklist_lock);
out_unlock_fown:
diff --git a/fs/file_table.c b/fs/file_table.c
index 334ce39881f..8eb44042e00 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files);
* Handle nr_files sysctl
*/
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
files_stat.nr_files = get_nr_files();
- return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec(table, write, buffer, lenp, ppos);
}
#else
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e703654e7f4..992f6c9410b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
return 0;
if (attr->ia_valid & ATTR_SIZE) {
- unsigned long limit;
- if (IS_SWAPFILE(inode))
- return -ETXTBSY;
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
- send_sig(SIGXFSZ, current, 0);
- return -EFBIG;
- }
+ err = inode_newsize_ok(inode, attr->ia_size);
+ if (err)
+ return err;
is_truncate = true;
}
@@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
* FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
*/
if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
- if (outarg.attr.size < oldsize)
- fuse_truncate(inode->i_mapping, outarg.attr.size);
+ truncate_pagecache(inode, oldsize, outarg.attr.size);
invalidate_inode_pages2(inode->i_mapping);
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fc9c79feb5f..01cc462ff45 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
u64 attr_valid);
-void fuse_truncate(struct address_space *mapping, loff_t offset);
-
/**
* Initialize the client device
*/
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6da947daabd..1a822ce2b24 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
return 0;
}
-void fuse_truncate(struct address_space *mapping, loff_t offset)
-{
- /* See vmtruncate() */
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, offset);
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-}
-
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
u64 attr_valid)
{
@@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
spin_unlock(&fc->lock);
if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
- if (attr->size < oldsize)
- fuse_truncate(inode->i_mapping, attr->size);
+ truncate_pagecache(inode, oldsize, attr->size);
invalidate_inode_pages2(inode->i_mapping);
}
}
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 7ebae9a4ecc..694b5d48f03 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1135,6 +1135,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
.direct_IO = gfs2_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations gfs2_ordered_aops = {
@@ -1151,6 +1152,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
.direct_IO = gfs2_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations gfs2_jdata_aops = {
@@ -1166,6 +1168,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
.invalidatepage = gfs2_invalidatepage,
.releasepage = gfs2_releasepage,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
void gfs2_set_aops(struct inode *inode)
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index c3ac1805405..247436c10de 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -12,7 +12,6 @@
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/namei.h>
-#include <linux/utsname.h>
#include <linux/mm.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 7b6165f25fb..8bbe03c3f6d 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb)
brelse(HFS_SB(sb)->mdb_bh);
brelse(HFS_SB(sb)->alt_mdb_bh);
- if (HFS_SB(sb)->nls_io)
- unload_nls(HFS_SB(sb)->nls_io);
- if (HFS_SB(sb)->nls_disk)
- unload_nls(HFS_SB(sb)->nls_disk);
+ unload_nls(HFS_SB(sb)->nls_io);
+ unload_nls(HFS_SB(sb)->nls_disk);
free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
kfree(HFS_SB(sb));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index c0759fe0855..43022f3d514 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb)
iput(HFSPLUS_SB(sb).alloc_file);
iput(HFSPLUS_SB(sb).hidden_dir);
brelse(HFSPLUS_SB(sb).s_vhbh);
- if (HFSPLUS_SB(sb).nls)
- unload_nls(HFSPLUS_SB(sb).nls);
+ unload_nls(HFSPLUS_SB(sb).nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
@@ -464,8 +463,7 @@ out:
cleanup:
hfsplus_put_super(sb);
- if (nls)
- unload_nls(nls);
+ unload_nls(nls);
return err;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index eba6d552d9c..87a1258953b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode)
static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
{
- struct super_block *sb = inode->i_sb;
-
- if (!hlist_unhashed(&inode->i_hash)) {
- if (!(inode->i_state & (I_DIRTY|I_SYNC)))
- list_move(&inode->i_list, &inode_unused);
- inodes_stat.nr_unused++;
- if (!sb || (sb->s_flags & MS_ACTIVE)) {
- spin_unlock(&inode_lock);
- return;
- }
- inode->i_state |= I_WILL_FREE;
- spin_unlock(&inode_lock);
- /*
- * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
- * in our backing_dev_info.
- */
- write_inode_now(inode, 1);
- spin_lock(&inode_lock);
- inode->i_state &= ~I_WILL_FREE;
- inodes_stat.nr_unused--;
- hlist_del_init(&inode->i_hash);
+ if (generic_detach_inode(inode)) {
+ truncate_hugepages(inode, 0);
+ clear_inode(inode);
+ destroy_inode(inode);
}
- list_del_init(&inode->i_list);
- list_del_init(&inode->i_sb_list);
- inode->i_state |= I_FREEING;
- inodes_stat.nr_inodes--;
- spin_unlock(&inode_lock);
- truncate_hugepages(inode, 0);
- clear_inode(inode);
- destroy_inode(inode);
}
static void hugetlbfs_drop_inode(struct inode *inode)
@@ -936,15 +911,9 @@ static struct file_system_type hugetlbfs_fs_type = {
static struct vfsmount *hugetlbfs_vfsmount;
-static int can_do_hugetlb_shm(int creat_flags)
+static int can_do_hugetlb_shm(void)
{
- if (creat_flags != HUGETLB_SHMFS_INODE)
- return 0;
- if (capable(CAP_IPC_LOCK))
- return 1;
- if (in_group_p(sysctl_hugetlb_shm_group))
- return 1;
- return 0;
+ return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
}
struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
@@ -960,7 +929,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
if (!hugetlbfs_vfsmount)
return ERR_PTR(-ENOENT);
- if (!can_do_hugetlb_shm(creat_flags)) {
+ if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
*user = current_user();
if (user_shm_lock(size, *user)) {
WARN_ONCE(1,
diff --git a/fs/inode.c b/fs/inode.c
index 76582b06ab9..4d8e3be5597 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
}
EXPORT_SYMBOL(generic_delete_inode);
-static void generic_forget_inode(struct inode *inode)
+/**
+ * generic_detach_inode - remove inode from inode lists
+ * @inode: inode to remove
+ *
+ * Remove inode from inode lists, write it if it's dirty. This is just an
+ * internal VFS helper exported for hugetlbfs. Do not use!
+ *
+ * Returns 1 if inode should be completely destroyed.
+ */
+int generic_detach_inode(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
@@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
inodes_stat.nr_unused++;
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode_lock);
- return;
+ return 0;
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
@@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(generic_detach_inode);
+
+static void generic_forget_inode(struct inode *inode)
+{
+ if (!generic_detach_inode(inode))
+ return;
if (inode->i_data.nrpages)
truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
@@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
struct timespec now;
- if (mnt_want_write(mnt))
- return;
if (inode->i_flags & S_NOATIME)
- goto out;
+ return;
if (IS_NOATIME(inode))
- goto out;
+ return;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
- goto out;
+ return;
if (mnt->mnt_flags & MNT_NOATIME)
- goto out;
+ return;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
- goto out;
+ return;
now = current_fs_time(inode->i_sb);
if (!relatime_need_update(mnt, inode, now))
- goto out;
+ return;
if (timespec_equal(&inode->i_atime, &now))
- goto out;
+ return;
+
+ if (mnt_want_write(mnt))
+ return;
inode->i_atime = now;
mark_inode_dirty_sync(inode);
-out:
mnt_drop_write(mnt);
}
EXPORT_SYMBOL(touch_atime);
@@ -1444,34 +1461,37 @@ void file_update_time(struct file *file)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct timespec now;
- int sync_it = 0;
- int err;
+ enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
+ /* First try to exhaust all avenues to not sync */
if (IS_NOCMTIME(inode))
return;
- err = mnt_want_write_file(file);
- if (err)
- return;
-
now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_mtime, &now)) {
- inode->i_mtime = now;
- sync_it = 1;
- }
+ if (!timespec_equal(&inode->i_mtime, &now))
+ sync_it = S_MTIME;
- if (!timespec_equal(&inode->i_ctime, &now)) {
- inode->i_ctime = now;
- sync_it = 1;
- }
+ if (!timespec_equal(&inode->i_ctime, &now))
+ sync_it |= S_CTIME;
- if (IS_I_VERSION(inode)) {
- inode_inc_iversion(inode);
- sync_it = 1;
- }
+ if (IS_I_VERSION(inode))
+ sync_it |= S_VERSION;
+
+ if (!sync_it)
+ return;
- if (sync_it)
- mark_inode_dirty_sync(inode);
+ /* Finally allowed to write? Takes lock. */
+ if (mnt_want_write_file(file))
+ return;
+
+ /* Only change inode inside the lock region */
+ if (sync_it & S_VERSION)
+ inode_inc_iversion(inode);
+ if (sync_it & S_CTIME)
+ inode->i_ctime = now;
+ if (sync_it & S_MTIME)
+ inode->i_mtime = now;
+ mark_inode_dirty_sync(inode);
mnt_drop_write(file->f_path.mnt);
}
EXPORT_SYMBOL(file_update_time);
@@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
else if (S_ISSOCK(mode))
inode->i_fop = &bad_sock_fops;
else
- printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
- mode);
+ printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
+ " inode %s:%lu\n", mode, inode->i_sb->s_id,
+ inode->i_ino);
}
EXPORT_SYMBOL(init_special_inode);
diff --git a/fs/internal.h b/fs/internal.h
index d55ef562f0b..515175b8b72 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *);
* namespace.c
*/
extern int copy_mount_options(const void __user *, unsigned long *);
+extern int copy_mount_string(const void __user *, char **);
extern void free_vfsmnt(struct vfsmount *);
extern struct vfsmount *alloc_vfsmnt(const char *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5612880fcbe..7b17a14396f 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags);
static int fiemap_check_ranges(struct super_block *sb,
u64 start, u64 len, u64 *new_len)
{
+ u64 maxbytes = (u64) sb->s_maxbytes;
+
*new_len = len;
if (len == 0)
return -EINVAL;
- if (start > sb->s_maxbytes)
+ if (start > maxbytes)
return -EFBIG;
/*
* Shrink request scope to what the fs can actually handle.
*/
- if ((len > sb->s_maxbytes) ||
- (sb->s_maxbytes - len) < start)
- *new_len = sb->s_maxbytes - start;
+ if (len > maxbytes || (maxbytes - len) < start)
+ *new_len = maxbytes - start;
return 0;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 85f96bc651c..6b4dcd4f294 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb)
#ifdef CONFIG_JOLIET
lock_kernel();
- if (sbi->s_nls_iocharset) {
- unload_nls(sbi->s_nls_iocharset);
- sbi->s_nls_iocharset = NULL;
- }
+ unload_nls(sbi->s_nls_iocharset);
unlock_kernel();
#endif
@@ -912,8 +909,7 @@ out_no_root:
printk(KERN_WARNING "%s: get root inode failed\n", __func__);
out_no_inode:
#ifdef CONFIG_JOLIET
- if (sbi->s_nls_iocharset)
- unload_nls(sbi->s_nls_iocharset);
+ unload_nls(sbi->s_nls_iocharset);
#endif
goto out_freesbi;
out_no_read:
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 37e6dcda8fc..2234c73fc57 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb)
rc = jfs_umount(sb);
if (rc)
jfs_err("jfs_umount failed with return code %d", rc);
- if (sbi->nls_tab)
- unload_nls(sbi->nls_tab);
- sbi->nls_tab = NULL;
+
+ unload_nls(sbi->nls_tab);
truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
iput(sbi->direct_inode);
- sbi->direct_inode = NULL;
kfree(sbi);
@@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
if (nls_map != (void *) -1) {
/* Discard old (if remount) */
- if (sbi->nls_tab)
- unload_nls(sbi->nls_tab);
+ unload_nls(sbi->nls_tab);
sbi->nls_tab = nls_map;
}
return 1;
diff --git a/fs/libfs.c b/fs/libfs.c
index dcec3d3ea64..219576c52d8 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
const void *from, size_t available)
{
loff_t pos = *ppos;
+ size_t ret;
+
if (pos < 0)
return -EINVAL;
- if (pos >= available)
+ if (pos >= available || !count)
return 0;
if (count > available - pos)
count = available - pos;
- if (copy_to_user(to, from + pos, count))
+ ret = copy_to_user(to, from + pos, count);
+ if (ret == count)
return -EFAULT;
+ count -= ret;
*ppos = pos + count;
return count;
}
@@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
if (copy_from_user(attr->set_buf, buf, size))
goto out;
- ret = len; /* claim we got the whole input */
attr->set_buf[size] = '\0';
val = simple_strtol(attr->set_buf, NULL, 0);
- attr->set(attr->data, val);
+ ret = attr->set(attr->data, val);
+ if (ret == 0)
+ ret = len; /* on success, claim we got the whole input */
out:
mutex_unlock(&attr->mutex);
return ret;
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 0336f2beacd..b583ab0a4cb 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -8,7 +8,6 @@
#include <linux/types.h>
#include <linux/sched.h>
-#include <linux/utsname.h>
#include <linux/nfs.h>
#include <linux/sunrpc/xdr.h>
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index e1d52865319..ad9dbbc9145 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -9,7 +9,6 @@
#include <linux/types.h>
#include <linux/sched.h>
-#include <linux/utsname.h>
#include <linux/nfs.h>
#include <linux/sunrpc/xdr.h>
diff --git a/fs/namespace.c b/fs/namespace.c
index 7230787d18b..bdc3cb4fd22 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
{
struct vfsmount *mnt;
- if (!type || !memchr(type, 0, PAGE_SIZE))
+ if (!type)
return -EINVAL;
/* we need capabilities... */
@@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where)
return 0;
}
+int copy_mount_string(const void __user *data, char **where)
+{
+ char *tmp;
+
+ if (!data) {
+ *where = NULL;
+ return 0;
+ }
+
+ tmp = strndup_user(data, PAGE_SIZE);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ *where = tmp;
+ return 0;
+}
+
/*
* Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
* be given to the mount() call (ie: read-only, no-dev, no-suid etc).
@@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
return -EINVAL;
- if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
- return -EINVAL;
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
@@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns);
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
char __user *, type, unsigned long, flags, void __user *, data)
{
- int retval;
+ int ret;
+ char *kernel_type;
+ char *kernel_dir;
+ char *kernel_dev;
unsigned long data_page;
- unsigned long type_page;
- unsigned long dev_page;
- char *dir_page;
- retval = copy_mount_options(type, &type_page);
- if (retval < 0)
- return retval;
+ ret = copy_mount_string(type, &kernel_type);
+ if (ret < 0)
+ goto out_type;
- dir_page = getname(dir_name);
- retval = PTR_ERR(dir_page);
- if (IS_ERR(dir_page))
- goto out1;
+ kernel_dir = getname(dir_name);
+ if (IS_ERR(kernel_dir)) {
+ ret = PTR_ERR(kernel_dir);
+ goto out_dir;
+ }
- retval = copy_mount_options(dev_name, &dev_page);
- if (retval < 0)
- goto out2;
+ ret = copy_mount_string(dev_name, &kernel_dev);
+ if (ret < 0)
+ goto out_dev;
- retval = copy_mount_options(data, &data_page);
- if (retval < 0)
- goto out3;
+ ret = copy_mount_options(data, &data_page);
+ if (ret < 0)
+ goto out_data;
- retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
- flags, (void *)data_page);
- free_page(data_page);
+ ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
+ (void *) data_page);
-out3:
- free_page(dev_page);
-out2:
- putname(dir_page);
-out1:
- free_page(type_page);
- return retval;
+ free_page(data_page);
+out_data:
+ kfree(kernel_dev);
+out_dev:
+ putname(kernel_dir);
+out_dir:
+ kfree(kernel_type);
+out_type:
+ return ret;
}
/*
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index b99ce205b1b..cf98da1be23 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb)
#ifdef CONFIG_NCPFS_NLS
/* unload the NLS charsets */
- if (server->nls_vol)
- {
- unload_nls(server->nls_vol);
- server->nls_vol = NULL;
- }
- if (server->nls_io)
- {
- unload_nls(server->nls_io);
- server->nls_io = NULL;
- }
+ unload_nls(server->nls_vol);
+ unload_nls(server->nls_io);
#endif /* CONFIG_NCPFS_NLS */
if (server->info_filp)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 53a7ed7eb9c..0d58caf4a6e 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
oldset_io = server->nls_io;
server->nls_io = iocharset;
- if (oldset_cp)
- unload_nls(oldset_cp);
- if (oldset_io)
- unload_nls(oldset_io);
+ unload_nls(oldset_cp);
+ unload_nls(oldset_io);
return 0;
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 152025358da..63976c0ccc2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -648,8 +648,6 @@ static int nfs_start_lockd(struct nfs_server *server)
.hostname = clp->cl_hostname,
.address = (struct sockaddr *)&clp->cl_addr,
.addrlen = clp->cl_addrlen,
- .protocol = server->flags & NFS_MOUNT_TCP ?
- IPPROTO_TCP : IPPROTO_UDP,
.nfs_version = clp->rpc_ops->version,
.noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
1 : 0,
@@ -660,6 +658,14 @@ static int nfs_start_lockd(struct nfs_server *server)
if (server->flags & NFS_MOUNT_NONLM)
return 0;
+ switch (clp->cl_proto) {
+ default:
+ nlm_init.protocol = IPPROTO_TCP;
+ break;
+ case XPRT_TRANSPORT_UDP:
+ nlm_init.protocol = IPPROTO_UDP;
+ }
+
host = nlmclnt_init(&nlm_init);
if (IS_ERR(host))
return PTR_ERR(host);
@@ -787,7 +793,7 @@ static int nfs_init_server(struct nfs_server *server,
dprintk("--> nfs_init_server()\n");
#ifdef CONFIG_NFS_V3
- if (data->flags & NFS_MOUNT_VER3)
+ if (data->version == 3)
cl_init.rpc_ops = &nfs_v3_clientops;
#endif
@@ -964,6 +970,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
target->acdirmin = source->acdirmin;
target->acdirmax = source->acdirmax;
target->caps = source->caps;
+ target->options = source->options;
}
/*
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5021b75d2d1..86d6b4db109 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -525,6 +525,7 @@ const struct address_space_operations nfs_file_aops = {
.direct_IO = nfs_direct_IO,
.migratepage = nfs_migrate_page,
.launder_page = nfs_launder_page,
+ .error_remove_page = generic_error_remove_page,
};
/*
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 379be678cb7..70fad69eb95 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -58,17 +58,34 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
/*
* Get the cache cookie for an NFS superblock. We have to handle
* uniquification here because the cache doesn't do it for us.
+ *
+ * The default uniquifier is just an empty string, but it may be overridden
+ * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
+ * superblock across an automount point of some nature.
*/
-void nfs_fscache_get_super_cookie(struct super_block *sb,
- struct nfs_parsed_mount_data *data)
+void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq,
+ struct nfs_clone_mount *mntdata)
{
struct nfs_fscache_key *key, *xkey;
struct nfs_server *nfss = NFS_SB(sb);
struct rb_node **p, *parent;
- const char *uniq = data->fscache_uniq ?: "";
int diff, ulen;
- ulen = strlen(uniq);
+ if (uniq) {
+ ulen = strlen(uniq);
+ } else if (mntdata) {
+ struct nfs_server *mnt_s = NFS_SB(mntdata->sb);
+ if (mnt_s->fscache_key) {
+ uniq = mnt_s->fscache_key->key.uniquifier;
+ ulen = mnt_s->fscache_key->key.uniq_len;
+ }
+ }
+
+ if (!uniq) {
+ uniq = "";
+ ulen = 1;
+ }
+
key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL);
if (!key)
return;
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 6e809bb0ff0..b9c572d0679 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -74,7 +74,8 @@ extern void nfs_fscache_get_client_cookie(struct nfs_client *);
extern void nfs_fscache_release_client_cookie(struct nfs_client *);
extern void nfs_fscache_get_super_cookie(struct super_block *,
- struct nfs_parsed_mount_data *);
+ const char *,
+ struct nfs_clone_mount *);
extern void nfs_fscache_release_super_cookie(struct super_block *);
extern void nfs_fscache_init_inode_cookie(struct inode *);
@@ -173,7 +174,8 @@ static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
static inline void nfs_fscache_get_super_cookie(
struct super_block *sb,
- struct nfs_parsed_mount_data *data)
+ const char *uniq,
+ struct nfs_clone_mount *mntdata)
{
}
static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 060022b4651..faa091865ad 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
*/
static int nfs_vmtruncate(struct inode * inode, loff_t offset)
{
- if (i_size_read(inode) < offset) {
- unsigned long limit;
-
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && offset > limit)
- goto out_sig;
- if (offset > inode->i_sb->s_maxbytes)
- goto out_big;
- spin_lock(&inode->i_lock);
- i_size_write(inode, offset);
- spin_unlock(&inode->i_lock);
- } else {
- struct address_space *mapping = inode->i_mapping;
+ loff_t oldsize;
+ int err;
- /*
- * truncation of in-use swapfiles is disallowed - it would
- * cause subsequent swapout to scribble on the now-freed
- * blocks.
- */
- if (IS_SWAPFILE(inode))
- return -ETXTBSY;
- spin_lock(&inode->i_lock);
- i_size_write(inode, offset);
- spin_unlock(&inode->i_lock);
+ err = inode_newsize_ok(inode, offset);
+ if (err)
+ goto out;
- /*
- * unmap_mapping_range is called twice, first simply for
- * efficiency so that truncate_inode_pages does fewer
- * single-page unmaps. However after this first call, and
- * before truncate_inode_pages finishes, it is possible for
- * private pages to be COWed, which remain after
- * truncate_inode_pages finishes, hence the second
- * unmap_mapping_range call must be made for correctness.
- */
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, offset);
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- }
- return 0;
-out_sig:
- send_sig(SIGXFSZ, current, 0);
-out_big:
- return -EFBIG;
+ spin_lock(&inode->i_lock);
+ oldsize = inode->i_size;
+ i_size_write(inode, offset);
+ spin_unlock(&inode->i_lock);
+
+ truncate_pagecache(inode, oldsize, offset);
+out:
+ return err;
}
/**
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c862c9340f9..5e078b222b4 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -13,7 +13,6 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ee6a13f0544..3f8881d1a05 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -7,7 +7,6 @@
*/
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/sunrpc/clnt.h>
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 35869a4921f..5fe5492fbd2 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -10,7 +10,6 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be6544aef41..ed7c269e251 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -36,7 +36,6 @@
*/
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/string.h>
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cfc30d362f9..83ad47cbdd8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -39,7 +39,6 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 7be72d90d49..ef583854d8d 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -32,7 +32,6 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f1cc0587cfe..810770f9681 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -728,6 +728,27 @@ static void nfs_umount_begin(struct super_block *sb)
unlock_kernel();
}
+static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(int flags)
+{
+ struct nfs_parsed_mount_data *data;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (data) {
+ data->flags = flags;
+ data->rsize = NFS_MAX_FILE_IO_SIZE;
+ data->wsize = NFS_MAX_FILE_IO_SIZE;
+ data->acregmin = NFS_DEF_ACREGMIN;
+ data->acregmax = NFS_DEF_ACREGMAX;
+ data->acdirmin = NFS_DEF_ACDIRMIN;
+ data->acdirmax = NFS_DEF_ACDIRMAX;
+ data->nfs_server.port = NFS_UNSPEC_PORT;
+ data->auth_flavors[0] = RPC_AUTH_UNIX;
+ data->auth_flavor_len = 1;
+ data->minorversion = 0;
+ }
+ return data;
+}
+
/*
* Sanity-check a server address provided by the mount command.
*
@@ -1430,10 +1451,13 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
int status;
if (args->mount_server.version == 0) {
- if (args->flags & NFS_MOUNT_VER3)
- args->mount_server.version = NFS_MNT3_VERSION;
- else
- args->mount_server.version = NFS_MNT_VERSION;
+ switch (args->version) {
+ default:
+ args->mount_server.version = NFS_MNT3_VERSION;
+ break;
+ case 2:
+ args->mount_server.version = NFS_MNT_VERSION;
+ }
}
request.version = args->mount_server.version;
@@ -1634,20 +1658,6 @@ static int nfs_validate_mount_data(void *options,
if (data == NULL)
goto out_no_data;
- args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
- args->rsize = NFS_MAX_FILE_IO_SIZE;
- args->wsize = NFS_MAX_FILE_IO_SIZE;
- args->acregmin = NFS_DEF_ACREGMIN;
- args->acregmax = NFS_DEF_ACREGMAX;
- args->acdirmin = NFS_DEF_ACDIRMIN;
- args->acdirmax = NFS_DEF_ACDIRMAX;
- args->mount_server.port = NFS_UNSPEC_PORT;
- args->nfs_server.port = NFS_UNSPEC_PORT;
- args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- args->auth_flavors[0] = RPC_AUTH_UNIX;
- args->auth_flavor_len = 1;
- args->minorversion = 0;
-
switch (data->version) {
case 1:
data->namlen = 0;
@@ -1778,7 +1788,7 @@ static int nfs_validate_mount_data(void *options,
}
#ifndef CONFIG_NFS_V3
- if (args->flags & NFS_MOUNT_VER3)
+ if (args->version == 3)
goto out_v3_not_compiled;
#endif /* !CONFIG_NFS_V3 */
@@ -1936,7 +1946,7 @@ static void nfs_fill_super(struct super_block *sb,
if (data->bsize)
sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
- if (server->flags & NFS_MOUNT_VER3) {
+ if (server->nfs_client->rpc_ops->version == 3) {
/* The VFS shouldn't apply the umask to mode bits. We will do
* so ourselves when necessary.
*/
@@ -1960,7 +1970,7 @@ static void nfs_clone_super(struct super_block *sb,
sb->s_blocksize = old_sb->s_blocksize;
sb->s_maxbytes = old_sb->s_maxbytes;
- if (server->flags & NFS_MOUNT_VER3) {
+ if (server->nfs_client->rpc_ops->version == 3) {
/* The VFS shouldn't apply the umask to mode bits. We will do
* so ourselves when necessary.
*/
@@ -2094,7 +2104,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
};
int error = -ENOMEM;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = nfs_alloc_parsed_mount_data(NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
if (data == NULL || mntfh == NULL)
goto out_free_fh;
@@ -2144,7 +2154,8 @@ static int nfs_get_sb(struct file_system_type *fs_type,
if (!s->s_root) {
/* initial superblock/root creation */
nfs_fill_super(s, data);
- nfs_fscache_get_super_cookie(s, data);
+ nfs_fscache_get_super_cookie(
+ s, data ? data->fscache_uniq : NULL, NULL);
}
mntroot = nfs_get_root(s, mntfh);
@@ -2245,6 +2256,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
if (!s->s_root) {
/* initial superblock/root creation */
nfs_clone_super(s, data->sb);
+ nfs_fscache_get_super_cookie(s, NULL, data);
}
mntroot = nfs_get_root(s, data->fh);
@@ -2362,18 +2374,7 @@ static int nfs4_validate_mount_data(void *options,
if (data == NULL)
goto out_no_data;
- args->rsize = NFS_MAX_FILE_IO_SIZE;
- args->wsize = NFS_MAX_FILE_IO_SIZE;
- args->acregmin = NFS_DEF_ACREGMIN;
- args->acregmax = NFS_DEF_ACREGMAX;
- args->acdirmin = NFS_DEF_ACDIRMIN;
- args->acdirmax = NFS_DEF_ACDIRMAX;
- args->nfs_server.port = NFS_UNSPEC_PORT;
- args->auth_flavors[0] = RPC_AUTH_UNIX;
- args->auth_flavor_len = 1;
args->version = 4;
- args->minorversion = 0;
-
switch (data->version) {
case 1:
if (data->host_addrlen > sizeof(args->nfs_server.address))
@@ -2508,7 +2509,8 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
if (!s->s_root) {
/* initial superblock/root creation */
nfs4_fill_super(s);
- nfs_fscache_get_super_cookie(s, data);
+ nfs_fscache_get_super_cookie(
+ s, data ? data->fscache_uniq : NULL, NULL);
}
mntroot = nfs4_get_root(s, mntfh);
@@ -2656,7 +2658,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
struct nfs_parsed_mount_data *data;
int error = -ENOMEM;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = nfs_alloc_parsed_mount_data(0);
if (data == NULL)
goto out_free_data;
@@ -2741,6 +2743,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
if (!s->s_root) {
/* initial superblock/root creation */
nfs4_clone_super(s, data->sb);
+ nfs_fscache_get_super_cookie(s, NULL, data);
}
mntroot = nfs4_get_root(s, data->fh);
@@ -2822,6 +2825,7 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
if (!s->s_root) {
/* initial superblock/root creation */
nfs4_fill_super(s);
+ nfs_fscache_get_super_cookie(s, NULL, data);
}
mntroot = nfs4_get_root(s, &mntfh);
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index cdfa86fa147..ba2c199592f 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -38,7 +38,6 @@
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/sunrpc/clnt.h>
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 477d37d83b3..2224b4d07bf 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset)
void unload_nls(struct nls_table *nls)
{
- module_put(nls->owner);
+ if (nls)
+ module_put(nls->owner);
}
static const wchar_t charset2uni[256] = {
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index b38f944f066..cfce53cb65d 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1550,6 +1550,7 @@ const struct address_space_operations ntfs_aops = {
.migratepage = buffer_migrate_page, /* Move a page cache page from
one physical page to an
other. */
+ .error_remove_page = generic_error_remove_page,
};
/**
@@ -1569,6 +1570,7 @@ const struct address_space_operations ntfs_mst_aops = {
.migratepage = buffer_migrate_page, /* Move a page cache page from
one physical page to an
other. */
+ .error_remove_page = generic_error_remove_page,
};
#ifdef NTFS_RW
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index abaaa1cbf8d..80b04770e8e 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -201,8 +201,7 @@ use_utf8:
v, old_nls->charset);
nls_map = old_nls;
} else /* nls_map */ {
- if (old_nls)
- unload_nls(old_nls);
+ unload_nls(old_nls);
}
} else if (!strcmp(p, "utf8")) {
bool val = false;
@@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb)
ntfs_free(vol->upcase);
vol->upcase = NULL;
}
- if (vol->nls_map) {
- unload_nls(vol->nls_map);
- vol->nls_map = NULL;
- }
+
+ unload_nls(vol->nls_map);
+
sb->s_fs_info = NULL;
kfree(vol);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 72e76062a90..deb2b132ae5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2022,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = {
.releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 81eff8e5832..01cf8cc3d28 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -30,7 +30,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 75997b4deaf..ca96bce50e1 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -30,7 +30,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index c5c88124096..ca46002ec10 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -27,7 +27,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/sysctl.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 4d9e6b288dd..0334000676d 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -28,7 +28,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 83a9f2972ac..437698e9465 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -30,7 +30,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f8b653fcd4d..83bcaf266b3 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -30,7 +30,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 43e6e328056..d9fa3d22e17 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -30,7 +30,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 98569e86c61..52ec020ea78 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -30,7 +30,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 756f5b0998e..00f53b2aea7 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -30,7 +30,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 24feb449a1d..4cc3c890a2c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -28,7 +28,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/random.h>
#include <linux/statfs.h>
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 579dd1b1110..e3421030a69 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -38,7 +38,6 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
-#include <linux/utsname.h>
#include <linux/namei.h>
#define MLOG_MASK_PREFIX ML_NAMEI
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 171e052c07b..c7bff4f603f 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -97,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
"Committed_AS: %8lu kB\n"
"VmallocTotal: %8lu kB\n"
"VmallocUsed: %8lu kB\n"
- "VmallocChunk: %8lu kB\n",
+ "VmallocChunk: %8lu kB\n"
+#ifdef CONFIG_MEMORY_FAILURE
+ "HardwareCorrupted: %8lu kB\n"
+#endif
+ ,
K(i.totalram),
K(i.freeram),
K(i.bufferram),
@@ -144,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
(unsigned long)VMALLOC_TOTAL >> 10,
vmi.used >> 10,
vmi.largest_chunk >> 10
+#ifdef CONFIG_MEMORY_FAILURE
+ ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
+#endif
);
hugetlb_report_meminfo(m);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 9b1e4e9a16b..f667e8aeabd 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
/* careful: calling conventions are nasty here */
res = count;
- error = table->proc_handler(table, write, filp, buf, &res, ppos);
+ error = table->proc_handler(table, write, buf, &res, ppos);
if (!error)
error = res;
out:
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 0c10a0b3f14..766b1d45605 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -4,13 +4,18 @@
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/time.h>
+#include <linux/kernel_stat.h>
#include <asm/cputime.h>
static int uptime_proc_show(struct seq_file *m, void *v)
{
struct timespec uptime;
struct timespec idle;
- cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
+ int i;
+ cputime_t idletime = cputime_zero;
+
+ for_each_possible_cpu(i)
+ idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
do_posix_clock_monotonic_gettime(&uptime);
monotonic_to_bootbased(&uptime);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 11f0c06316d..32fae4040eb 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
/* make various checks */
order = get_order(newsize);
if (unlikely(order >= MAX_ORDER))
- goto too_big;
+ return -EFBIG;
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && newsize > limit)
- goto fsize_exceeded;
-
- if (newsize > inode->i_sb->s_maxbytes)
- goto too_big;
+ ret = inode_newsize_ok(inode, newsize);
+ if (ret)
+ return ret;
i_size_write(inode, newsize);
@@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
return 0;
- fsize_exceeded:
- send_sig(SIGXFSZ, current, 0);
- too_big:
- return -EFBIG;
-
- add_error:
+add_error:
while (loop < npages)
__free_page(pages + loop++);
return ret;
diff --git a/fs/read_write.c b/fs/read_write.c
index 6c8c55dec2b..3ac28987f22 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
pos = *ppos;
- retval = -EINVAL;
- if (unlikely(pos < 0))
- goto fput_out;
if (unlikely(pos + count > max)) {
retval = -EOVERFLOW;
if (pos >= max)
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 47f132df0c3..c117fa80d1e 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -528,7 +528,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK;
root = romfs_iget(sb, pos);
- if (!root)
+ if (IS_ERR(root))
goto error;
sb->s_root = d_alloc_root(root);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 6c959275f2d..eae7d9dbf3f 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path);
*/
int seq_path(struct seq_file *m, struct path *path, char *esc)
{
- if (m->count < m->size) {
- char *s = m->buf + m->count;
- char *p = d_path(path, s, m->size - m->count);
+ char *buf;
+ size_t size = seq_get_buf(m, &buf);
+ int res = -1;
+
+ if (size) {
+ char *p = d_path(path, buf, size);
if (!IS_ERR(p)) {
- s = mangle_path(s, p, esc);
- if (s) {
- p = m->buf + m->count;
- m->count = s - m->buf;
- return s - p;
- }
+ char *end = mangle_path(buf, p, esc);
+ if (end)
+ res = end - buf;
}
}
- m->count = m->size;
- return -1;
+ seq_commit(m, res);
+
+ return res;
}
EXPORT_SYMBOL(seq_path);
@@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path);
int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
char *esc)
{
- int err = -ENAMETOOLONG;
- if (m->count < m->size) {
- char *s = m->buf + m->count;
+ char *buf;
+ size_t size = seq_get_buf(m, &buf);
+ int res = -ENAMETOOLONG;
+
+ if (size) {
char *p;
spin_lock(&dcache_lock);
- p = __d_path(path, root, s, m->size - m->count);
+ p = __d_path(path, root, buf, size);
spin_unlock(&dcache_lock);
- err = PTR_ERR(p);
+ res = PTR_ERR(p);
if (!IS_ERR(p)) {
- s = mangle_path(s, p, esc);
- if (s) {
- p = m->buf + m->count;
- m->count = s - m->buf;
- return 0;
- }
+ char *end = mangle_path(buf, p, esc);
+ if (end)
+ res = end - buf;
+ else
+ res = -ENAMETOOLONG;
}
}
- m->count = m->size;
- return err;
+ seq_commit(m, res);
+
+ return res < 0 ? res : 0;
}
/*
@@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
*/
int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
{
- if (m->count < m->size) {
- char *s = m->buf + m->count;
- char *p = dentry_path(dentry, s, m->size - m->count);
+ char *buf;
+ size_t size = seq_get_buf(m, &buf);
+ int res = -1;
+
+ if (size) {
+ char *p = dentry_path(dentry, buf, size);
if (!IS_ERR(p)) {
- s = mangle_path(s, p, esc);
- if (s) {
- p = m->buf + m->count;
- m->count = s - m->buf;
- return s - p;
- }
+ char *end = mangle_path(buf, p, esc);
+ if (end)
+ res = end - buf;
}
}
- m->count = m->size;
- return -1;
+ seq_commit(m, res);
+
+ return res;
}
int seq_bitmap(struct seq_file *m, const unsigned long *bits,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 1402d2d54f5..1c4c8f08997 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m)
static void
smb_unload_nls(struct smb_sb_info *server)
{
- if (server->remote_nls) {
- unload_nls(server->remote_nls);
- server->remote_nls = NULL;
- }
- if (server->local_nls) {
- unload_nls(server->local_nls);
- server->local_nls = NULL;
- }
+ unload_nls(server->remote_nls);
+ unload_nls(server->local_nls);
}
static void
diff --git a/fs/super.c b/fs/super.c
index 0e7207b9815..19eb70b374b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -465,6 +465,48 @@ rescan:
}
EXPORT_SYMBOL(get_super);
+
+/**
+ * get_active_super - get an active reference to the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given. Returns the superblock with an active
+ * reference and s_umount held exclusively or %NULL if none was found.
+ */
+struct super_block *get_active_super(struct block_device *bdev)
+{
+ struct super_block *sb;
+
+ if (!bdev)
+ return NULL;
+
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ if (sb->s_bdev != bdev)
+ continue;
+
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+ down_write(&sb->s_umount);
+ if (sb->s_root) {
+ spin_lock(&sb_lock);
+ if (sb->s_count > S_BIAS) {
+ atomic_inc(&sb->s_active);
+ sb->s_count--;
+ spin_unlock(&sb_lock);
+ return sb;
+ }
+ spin_unlock(&sb_lock);
+ }
+ up_write(&sb->s_umount);
+ put_super(sb);
+ yield();
+ spin_lock(&sb_lock);
+ }
+ spin_unlock(&sb_lock);
+ return NULL;
+}
struct super_block * user_get_super(dev_t dev)
{
@@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
{
int retval;
int remount_rw;
-
+
+ if (sb->s_frozen != SB_UNFROZEN)
+ return -EBUSY;
+
#ifdef CONFIG_BLOCK
if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
return -EACCES;
#endif
+
if (flags & MS_RDONLY)
acct_auto_close(sb);
shrink_dcache_sb(sb);
@@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type,
* will protect the lockfs code from trying to start a snapshot
* while we are mounting
*/
- down(&bdev->bd_mount_sem);
+ mutex_lock(&bdev->bd_fsfreeze_mutex);
+ if (bdev->bd_fsfreeze_count > 0) {
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ error = -EBUSY;
+ goto error_bdev;
+ }
s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
- up(&bdev->bd_mount_sem);
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
if (IS_ERR(s))
goto error_s;
@@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
if (error)
goto out_sb;
+ /*
+ * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
+ * but s_maxbytes was an unsigned long long for many releases. Throw
+ * this warning for a little while to try and catch filesystems that
+ * violate this rule. This warning should be either removed or
+ * converted to a BUG() in 2.6.34.
+ */
+ WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
+ "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
+
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
up_write(&mnt->mnt_sb->s_umount);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d5e5559e31d..381854461b2 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1635,4 +1635,5 @@ const struct address_space_operations xfs_address_space_operations = {
.direct_IO = xfs_vm_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 916c0ffb608..c5bc67c4e3b 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -26,7 +26,6 @@ STATIC int
xfs_stats_clear_proc_handler(
ctl_table *ctl,
int write,
- struct file *filp,
void __user *buffer,
size_t *lenp,
loff_t *ppos)
@@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler(
int c, ret, *valp = ctl->data;
__uint32_t vn_active;
- ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
if (!ret && write && *valp) {
printk("XFS Clearing xfsstats\n");
diff --git a/include/acpi/button.h b/include/acpi/button.h
new file mode 100644
index 00000000000..97eea0e4c01
--- /dev/null
+++ b/include/acpi/button.h
@@ -0,0 +1,25 @@
+#ifndef ACPI_BUTTON_H
+#define ACPI_BUTTON_H
+
+#include <linux/notifier.h>
+
+#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE)
+extern int acpi_lid_notifier_register(struct notifier_block *nb);
+extern int acpi_lid_notifier_unregister(struct notifier_block *nb);
+extern int acpi_lid_open(void);
+#else
+static inline int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+ return 0;
+}
+static inline int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+ return 0;
+}
+static inline int acpi_lid_open(void)
+{
+ return 1;
+}
+#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */
+
+#endif /* ACPI_BUTTON_H */
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 4d3e48373e7..0c3dd860392 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -73,6 +73,19 @@
#define F_SETSIG 10 /* for sockets. */
#define F_GETSIG 11 /* for sockets. */
#endif
+#ifndef F_SETOWN_EX
+#define F_SETOWN_EX 12
+#define F_GETOWN_EX 13
+#endif
+
+#define F_OWNER_TID 0
+#define F_OWNER_PID 1
+#define F_OWNER_GID 2
+
+struct f_owner_ex {
+ int type;
+ pid_t pid;
+};
/* for F_[GET|SET]FL */
#define FD_CLOEXEC 1 /* actually anything with low bit set goes */
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index dd63bd38864..5ee13b2fd22 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -34,6 +34,7 @@
#define MADV_REMOVE 9 /* remove these pages & resources */
#define MADV_DONTFORK 10 /* don't inherit across fork */
#define MADV_DOFORK 11 /* do inherit across fork */
+#define MADV_HWPOISON 100 /* poison a page for testing */
#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index c840719a8c5..942d30b5aab 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -82,6 +82,7 @@ typedef struct siginfo {
#ifdef __ARCH_SI_TRAPNO
int _trapno; /* TRAP # which caused the signal */
#endif
+ short _addr_lsb; /* LSB of the reported address */
} _sigfault;
/* SIGPOLL */
@@ -112,6 +113,7 @@ typedef struct siginfo {
#ifdef __ARCH_SI_TRAPNO
#define si_trapno _sifields._sigfault._trapno
#endif
+#define si_addr_lsb _sifields._sigfault._addr_lsb
#define si_band _sifields._sigpoll._band
#define si_fd _sifields._sigpoll._fd
@@ -192,7 +194,11 @@ typedef struct siginfo {
#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */
#define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */
#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */
-#define NSIGBUS 3
+/* hardware memory error consumed on a machine check: action required */
+#define BUS_MCEERR_AR (__SI_FAULT|4)
+/* hardware memory error detected in process but not consumed: action optional*/
+#define BUS_MCEERR_AO (__SI_FAULT|5)
+#define NSIGBUS 5
/*
* SIGTRAP si_codes
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index 88bada2ebc4..510df36dd5d 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -37,9 +37,6 @@
#ifndef parent_node
#define parent_node(node) ((void)(node),0)
#endif
-#ifndef node_to_cpumask
-#define node_to_cpumask(node) ((void)node, cpu_online_map)
-#endif
#ifndef cpumask_of_node
#define cpumask_of_node(node) ((void)node, cpu_online_mask)
#endif
@@ -55,18 +52,4 @@
#endif /* CONFIG_NUMA */
-/*
- * returns pointer to cpumask for specified node
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
-#ifndef node_to_cpumask_ptr
-
-#define node_to_cpumask_ptr(v, node) \
- cpumask_t _##v = node_to_cpumask(node); \
- const cpumask_t *v = &_##v
-
-#define node_to_cpumask_ptr_next(v, node) \
- _##v = node_to_cpumask(node)
-#endif
-
#endif /* _ASM_GENERIC_TOPOLOGY_H */
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 853508499d2..3f6e545609b 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -552,6 +552,7 @@
{0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
{0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
{0x8086, 0x2e32, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+ {0x8086, 0x2e42, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
{0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
{0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
{0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8e1e92583fb..7e0cb1da92e 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -185,6 +185,7 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_GEM_GET_APERTURE 0x23
#define DRM_I915_GEM_MMAP_GTT 0x24
#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25
+#define DRM_I915_GEM_MADVISE 0x26
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -221,6 +222,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling)
#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id)
+#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
@@ -667,4 +669,21 @@ struct drm_i915_get_pipe_from_crtc_id {
__u32 pipe;
};
+#define I915_MADV_WILLNEED 0
+#define I915_MADV_DONTNEED 1
+#define __I915_MADV_PURGED 2 /* internal state */
+
+struct drm_i915_gem_madvise {
+ /** Handle of the buffer to change the backing store advice */
+ __u32 handle;
+
+ /* Advice: either the buffer will be needed again in the near future,
+ * or wont be and could be discarded under memory pressure.
+ */
+ __u32 madv;
+
+ /** Whether the backing store still exists. */
+ __u32 retained;
+};
+
#endif /* _I915_DRM_H_ */
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 5fc2ef8d97f..a1c486a88e8 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,25 +58,60 @@ struct dma_chan_ref {
* array.
* @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
* dependency chain
- * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining.
+ * @ASYNC_TX_FENCE: specify that the next operation in the dependency
+ * chain uses this operation's result as an input
*/
enum async_tx_flags {
ASYNC_TX_XOR_ZERO_DST = (1 << 0),
ASYNC_TX_XOR_DROP_DST = (1 << 1),
- ASYNC_TX_ACK = (1 << 3),
- ASYNC_TX_DEP_ACK = (1 << 4),
+ ASYNC_TX_ACK = (1 << 2),
+ ASYNC_TX_FENCE = (1 << 3),
+};
+
+/**
+ * struct async_submit_ctl - async_tx submission/completion modifiers
+ * @flags: submission modifiers
+ * @depend_tx: parent dependency of the current operation being submitted
+ * @cb_fn: callback routine to run at operation completion
+ * @cb_param: parameter for the callback routine
+ * @scribble: caller provided space for dma/page address conversions
+ */
+struct async_submit_ctl {
+ enum async_tx_flags flags;
+ struct dma_async_tx_descriptor *depend_tx;
+ dma_async_tx_callback cb_fn;
+ void *cb_param;
+ void *scribble;
};
#ifdef CONFIG_DMA_ENGINE
#define async_tx_issue_pending_all dma_issue_pending_all
+
+/**
+ * async_tx_issue_pending - send pending descriptor to the hardware channel
+ * @tx: descriptor handle to retrieve hardware context
+ *
+ * Note: any dependent operations will have already been issued by
+ * async_tx_channel_switch, or (in the case of no channel switch) will
+ * be already pending on this channel.
+ */
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+ if (likely(tx)) {
+ struct dma_chan *chan = tx->chan;
+ struct dma_device *dma = chan->device;
+
+ dma->device_issue_pending(chan);
+ }
+}
#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
#include <asm/async_tx.h>
#else
#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
__async_tx_find_channel(dep, type)
struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
- enum dma_transaction_type tx_type);
+__async_tx_find_channel(struct async_submit_ctl *submit,
+ enum dma_transaction_type tx_type);
#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
#else
static inline void async_tx_issue_pending_all(void)
@@ -84,10 +119,16 @@ static inline void async_tx_issue_pending_all(void)
do { } while (0);
}
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+ do { } while (0);
+}
+
static inline struct dma_chan *
-async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
- enum dma_transaction_type tx_type, struct page **dst, int dst_count,
- struct page **src, int src_count, size_t len)
+async_tx_find_channel(struct async_submit_ctl *submit,
+ enum dma_transaction_type tx_type, struct page **dst,
+ int dst_count, struct page **src, int src_count,
+ size_t len)
{
return NULL;
}
@@ -99,46 +140,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
* @cb_fn_param: parameter to pass to the callback routine
*/
static inline void
-async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param)
+async_tx_sync_epilog(struct async_submit_ctl *submit)
{
- if (cb_fn)
- cb_fn(cb_fn_param);
+ if (submit->cb_fn)
+ submit->cb_fn(submit->cb_param);
}
-void
-async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
- enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+typedef union {
+ unsigned long addr;
+ struct page *page;
+ dma_addr_t dma;
+} addr_conv_t;
+
+static inline void
+init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
+ struct dma_async_tx_descriptor *tx,
+ dma_async_tx_callback cb_fn, void *cb_param,
+ addr_conv_t *scribble)
+{
+ args->flags = flags;
+ args->depend_tx = tx;
+ args->cb_fn = cb_fn;
+ args->cb_param = cb_param;
+ args->scribble = scribble;
+}
+
+void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
+ struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
- int src_cnt, size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+ int src_cnt, size_t len, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
- unsigned int offset, int src_cnt, size_t len,
- u32 *result, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+ int src_cnt, size_t len, enum sum_check_flags *result,
+ struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
- unsigned int src_offset, size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+ unsigned int src_offset, size_t len,
+ struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_memset(struct page *dest, int val, unsigned int offset,
- size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+ size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
+ size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
+ size_t len, enum sum_check_flags *pqres, struct page *spare,
+ struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
+ struct page **ptrs, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_raid6_datap_recov(int src_num, size_t bytes, int faila,
+ struct page **ptrs, struct async_submit_ctl *submit);
void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
#endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2046b5b8af4..aece486ac73 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
extern int prepare_bprm_creds(struct linux_binprm *bprm);
extern void install_exec_creds(struct linux_binprm *bprm);
extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
-extern int set_binfmt(struct linux_binfmt *new);
+extern void set_binfmt(struct linux_binfmt *new);
extern void free_bprm(struct linux_binprm *);
#endif /* __KERNEL__ */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 90bba9e6228..b62bb9294d0 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -141,6 +141,38 @@ enum {
CGRP_WAIT_ON_RMDIR,
};
+/* which pidlist file are we talking about? */
+enum cgroup_filetype {
+ CGROUP_FILE_PROCS,
+ CGROUP_FILE_TASKS,
+};
+
+/*
+ * A pidlist is a list of pids that virtually represents the contents of one
+ * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
+ * a pair (one each for procs, tasks) for each pid namespace that's relevant
+ * to the cgroup.
+ */
+struct cgroup_pidlist {
+ /*
+ * used to find which pidlist is wanted. doesn't change as long as
+ * this particular list stays in the list.
+ */
+ struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
+ /* array of xids */
+ pid_t *list;
+ /* how many elements the above list has */
+ int length;
+ /* how many files are using the current array */
+ int use_count;
+ /* each of these stored in a list by its cgroup */
+ struct list_head links;
+ /* pointer to the cgroup we belong to, for list removal purposes */
+ struct cgroup *owner;
+ /* protects the other fields */
+ struct rw_semaphore mutex;
+};
+
struct cgroup {
unsigned long flags; /* "unsigned long" so bitops work */
@@ -179,11 +211,12 @@ struct cgroup {
*/
struct list_head release_list;
- /* pids_mutex protects pids_list and cached pid arrays. */
- struct rw_semaphore pids_mutex;
-
- /* Linked list of struct cgroup_pids */
- struct list_head pids_list;
+ /*
+ * list of pidlists, up to two for each namespace (one for procs, one
+ * for tasks); created on demand.
+ */
+ struct list_head pidlists;
+ struct mutex pidlist_mutex;
/* For RCU-protected deletion */
struct rcu_head rcu_head;
@@ -227,6 +260,9 @@ struct css_set {
* during subsystem registration (at boot time).
*/
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+ /* For RCU-protected deletion */
+ struct rcu_head rcu_head;
};
/*
@@ -389,10 +425,11 @@ struct cgroup_subsys {
struct cgroup *cgrp);
int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
- int (*can_attach)(struct cgroup_subsys *ss,
- struct cgroup *cgrp, struct task_struct *tsk);
+ int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct task_struct *tsk, bool threadgroup);
void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cgrp, struct task_struct *tsk);
+ struct cgroup *old_cgrp, struct task_struct *tsk,
+ bool threadgroup);
void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
int (*populate)(struct cgroup_subsys *ss,
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 7f627775c94..ddb7a97c78c 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -27,8 +27,8 @@
*
* configfs Copyright (C) 2005 Oracle. All rights reserved.
*
- * Please read Documentation/filesystems/configfs.txt before using the
- * configfs interface, ESPECIALLY the parts about reference counts and
+ * Please read Documentation/filesystems/configfs/configfs.txt before using
+ * the configfs interface, ESPECIALLY the parts about reference counts and
* item destructors.
*/
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9b1d458aac6..789cf5f920c 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -3,444 +3,37 @@
/*
* Cpumasks provide a bitmap suitable for representing the
- * set of CPU's in a system, one bit position per CPU number.
- *
- * The new cpumask_ ops take a "struct cpumask *"; the old ones
- * use cpumask_t.
- *
- * See detailed comments in the file linux/bitmap.h describing the
- * data type on which these cpumasks are based.
- *
- * For details of cpumask_scnprintf() and cpumask_parse_user(),
- * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c.
- * For details of cpulist_scnprintf() and cpulist_parse(), see
- * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
- * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c
- * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c.
- * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
- * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
- *
- * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
- * Note: The alternate operations with the suffix "_nr" are used
- * to limit the range of the loop to nr_cpu_ids instead of
- * NR_CPUS when NR_CPUS > 64 for performance reasons.
- * If NR_CPUS is <= 64 then most assembler bitmask
- * operators execute faster with a constant range, so
- * the operator will continue to use NR_CPUS.
- *
- * Another consideration is that nr_cpu_ids is initialized
- * to NR_CPUS and isn't lowered until the possible cpus are
- * discovered (including any disabled cpus). So early uses
- * will span the entire range of NR_CPUS.
- * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
- *
- * The obsolescent cpumask operations are:
- *
- * void cpu_set(cpu, mask) turn on bit 'cpu' in mask
- * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask
- * void cpus_setall(mask) set all bits
- * void cpus_clear(mask) clear all bits
- * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask
- * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask
- *
- * int cpus_and(dst, src1, src2) dst = src1 & src2 [intersection]
- * void cpus_or(dst, src1, src2) dst = src1 | src2 [union]
- * void cpus_xor(dst, src1, src2) dst = src1 ^ src2
- * int cpus_andnot(dst, src1, src2) dst = src1 & ~src2
- * void cpus_complement(dst, src) dst = ~src
- *
- * int cpus_equal(mask1, mask2) Does mask1 == mask2?
- * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect?
- * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2?
- * int cpus_empty(mask) Is mask empty (no bits sets)?
- * int cpus_full(mask) Is mask full (all bits sets)?
- * int cpus_weight(mask) Hamming weigh - number of set bits
- * int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS
- *
- * void cpus_shift_right(dst, src, n) Shift right
- * void cpus_shift_left(dst, src, n) Shift left
- *
- * int first_cpu(mask) Number lowest set bit, or NR_CPUS
- * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS
- * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids
- *
- * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
- * (can be used as an lvalue)
- * CPU_MASK_ALL Initializer - all bits set
- * CPU_MASK_NONE Initializer - no bits set
- * unsigned long *cpus_addr(mask) Array of unsigned long's in mask
- *
- * CPUMASK_ALLOC kmalloc's a structure that is a composite of many cpumask_t
- * variables, and CPUMASK_PTR provides pointers to each field.
- *
- * The structure should be defined something like this:
- * struct my_cpumasks {
- * cpumask_t mask1;
- * cpumask_t mask2;
- * };
- *
- * Usage is then:
- * CPUMASK_ALLOC(my_cpumasks);
- * CPUMASK_PTR(mask1, my_cpumasks);
- * CPUMASK_PTR(mask2, my_cpumasks);
- *
- * --- DO NOT reference cpumask_t pointers until this check ---
- * if (my_cpumasks == NULL)
- * "kmalloc failed"...
- *
- * References are now pointers to the cpumask_t variables (*mask1, ...)
- *
- *if NR_CPUS > BITS_PER_LONG
- * CPUMASK_ALLOC(m) Declares and allocates struct m *m =
- * kmalloc(sizeof(*m), GFP_KERNEL)
- * CPUMASK_FREE(m) Macro for kfree(m)
- *else
- * CPUMASK_ALLOC(m) Declares struct m _m, *m = &_m
- * CPUMASK_FREE(m) Nop
- *endif
- * CPUMASK_PTR(v, m) Declares cpumask_t *v = &(m->v)
- * ------------------------------------------------------------------------
- *
- * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
- * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask
- * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
- * int cpulist_parse(buf, map) Parse ascii string as cpulist
- * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit)
- * void cpus_remap(dst, src, old, new) *dst = map(old, new)(src)
- * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap
- * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz
- *
- * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS
- * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids
- *
- * int num_online_cpus() Number of online CPUs
- * int num_possible_cpus() Number of all possible CPUs
- * int num_present_cpus() Number of present CPUs
- *
- * int cpu_online(cpu) Is some cpu online?
- * int cpu_possible(cpu) Is some cpu possible?
- * int cpu_present(cpu) Is some cpu present (can schedule)?
- *
- * int any_online_cpu(mask) First online cpu in mask
- *
- * for_each_possible_cpu(cpu) for-loop cpu over cpu_possible_map
- * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map
- * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map
- *
- * Subtlety:
- * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway)
- * to generate slightly worse code. Note for example the additional
- * 40 lines of assembly code compiling the "for each possible cpu"
- * loops buried in the disk_stat_read() macros calls when compiling
- * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple
- * one-line #define for cpu_isset(), instead of wrapping an inline
- * inside a macro, the way we do the other calls.
+ * set of CPU's in a system, one bit position per CPU number. In general,
+ * only nr_cpu_ids (<= NR_CPUS) bits are valid.
*/
-
#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/bitmap.h>
typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
-extern cpumask_t _unused_cpumask_arg_;
-
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
-static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
-{
- set_bit(cpu, dstp->bits);
-}
-
-#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
-static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
-{
- clear_bit(cpu, dstp->bits);
-}
-
-#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
-static inline void __cpus_setall(cpumask_t *dstp, int nbits)
-{
- bitmap_fill(dstp->bits, nbits);
-}
-
-#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
-static inline void __cpus_clear(cpumask_t *dstp, int nbits)
-{
- bitmap_zero(dstp->bits, nbits);
-}
-
-/* No static inline type checking - see Subtlety (1) above. */
-#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
-
-#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
-static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
-{
- return test_and_set_bit(cpu, addr->bits);
-}
-
-#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
-static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_andnot(dst, src1, src2) \
- __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
-static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
-static inline void __cpus_complement(cpumask_t *dstp,
- const cpumask_t *srcp, int nbits)
-{
- bitmap_complement(dstp->bits, srcp->bits, nbits);
-}
-
-#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_equal(const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- return bitmap_equal(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_intersects(const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- return bitmap_intersects(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_subset(const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- return bitmap_subset(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
-static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
-{
- return bitmap_empty(srcp->bits, nbits);
-}
-
-#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS)
-static inline int __cpus_full(const cpumask_t *srcp, int nbits)
-{
- return bitmap_full(srcp->bits, nbits);
-}
-
-#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
-static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
-{
- return bitmap_weight(srcp->bits, nbits);
-}
-
-#define cpus_shift_right(dst, src, n) \
- __cpus_shift_right(&(dst), &(src), (n), NR_CPUS)
-static inline void __cpus_shift_right(cpumask_t *dstp,
- const cpumask_t *srcp, int n, int nbits)
-{
- bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
-}
-
-#define cpus_shift_left(dst, src, n) \
- __cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
-static inline void __cpus_shift_left(cpumask_t *dstp,
- const cpumask_t *srcp, int n, int nbits)
-{
- bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
-}
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
/**
- * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
- * @bitmap: the bitmap
- *
- * There are a few places where cpumask_var_t isn't appropriate and
- * static cpumasks must be used (eg. very early boot), yet we don't
- * expose the definition of 'struct cpumask'.
- *
- * This does the conversion, and can be used as a constant initializer.
- */
-#define to_cpumask(bitmap) \
- ((struct cpumask *)(1 ? (bitmap) \
- : (void *)sizeof(__check_is_bitmap(bitmap))))
-
-static inline int __check_is_bitmap(const unsigned long *bitmap)
-{
- return 1;
-}
-
-/*
- * Special-case data structure for "single bit set only" constant CPU masks.
+ * cpumask_bits - get the bits in a cpumask
+ * @maskp: the struct cpumask *
*
- * We pre-generate all the 64 (or 32) possible bit positions, with enough
- * padding to the left and the right, and return the constant pointer
- * appropriately offset.
- */
-extern const unsigned long
- cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
-
-static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
-{
- const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
- p -= cpu / BITS_PER_LONG;
- return to_cpumask(p);
-}
-
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-/*
- * In cases where we take the address of the cpumask immediately,
- * gcc optimizes it out (it's a constant) and there's no huge stack
- * variable created:
+ * You should only assume nr_cpu_ids bits of this mask are valid. This is
+ * a macro so it's const-correct.
*/
-#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
-
-
-#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
-
-#if NR_CPUS <= BITS_PER_LONG
-
-#define CPU_MASK_ALL \
-(cpumask_t) { { \
- [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
-} }
-
-#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL)
-
-#else
-
-#define CPU_MASK_ALL \
-(cpumask_t) { { \
- [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
- [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
-} }
-
-/* cpu_mask_all is in init/main.c */
-extern cpumask_t cpu_mask_all;
-#define CPU_MASK_ALL_PTR (&cpu_mask_all)
-
-#endif
-
-#define CPU_MASK_NONE \
-(cpumask_t) { { \
- [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
-} }
-
-#define CPU_MASK_CPU0 \
-(cpumask_t) { { \
- [0] = 1UL \
-} }
-
-#define cpus_addr(src) ((src).bits)
-
-#if NR_CPUS > BITS_PER_LONG
-#define CPUMASK_ALLOC(m) struct m *m = kmalloc(sizeof(*m), GFP_KERNEL)
-#define CPUMASK_FREE(m) kfree(m)
-#else
-#define CPUMASK_ALLOC(m) struct m _m, *m = &_m
-#define CPUMASK_FREE(m)
-#endif
-#define CPUMASK_PTR(v, m) cpumask_t *v = &(m->v)
-
-#define cpu_remap(oldbit, old, new) \
- __cpu_remap((oldbit), &(old), &(new), NR_CPUS)
-static inline int __cpu_remap(int oldbit,
- const cpumask_t *oldp, const cpumask_t *newp, int nbits)
-{
- return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits);
-}
-
-#define cpus_remap(dst, src, old, new) \
- __cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS)
-static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp,
- const cpumask_t *oldp, const cpumask_t *newp, int nbits)
-{
- bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
-}
-
-#define cpus_onto(dst, orig, relmap) \
- __cpus_onto(&(dst), &(orig), &(relmap), NR_CPUS)
-static inline void __cpus_onto(cpumask_t *dstp, const cpumask_t *origp,
- const cpumask_t *relmapp, int nbits)
-{
- bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
-}
-
-#define cpus_fold(dst, orig, sz) \
- __cpus_fold(&(dst), &(orig), sz, NR_CPUS)
-static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
- int sz, int nbits)
-{
- bitmap_fold(dstp->bits, origp->bits, sz, nbits);
-}
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+#define cpumask_bits(maskp) ((maskp)->bits)
#if NR_CPUS == 1
-
#define nr_cpu_ids 1
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#define first_cpu(src) ({ (void)(src); 0; })
-#define next_cpu(n, src) ({ (void)(src); 1; })
-#define any_online_cpu(mask) 0
-#define for_each_cpu_mask(cpu, mask) \
- for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
-#else /* NR_CPUS > 1 */
-
+#else
extern int nr_cpu_ids;
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-int __first_cpu(const cpumask_t *srcp);
-int __next_cpu(int n, const cpumask_t *srcp);
-int __any_online_cpu(const cpumask_t *mask);
-
-#define first_cpu(src) __first_cpu(&(src))
-#define next_cpu(n, src) __next_cpu((n), &(src))
-#define any_online_cpu(mask) __any_online_cpu(&(mask))
-#define for_each_cpu_mask(cpu, mask) \
- for ((cpu) = -1; \
- (cpu) = next_cpu((cpu), (mask)), \
- (cpu) < NR_CPUS; )
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
#endif
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#if NR_CPUS <= 64
-
-#define next_cpu_nr(n, src) next_cpu(n, src)
-#define cpus_weight_nr(cpumask) cpus_weight(cpumask)
-#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask)
-
-#else /* NR_CPUS > 64 */
-
-int __next_cpu_nr(int n, const cpumask_t *srcp);
-#define next_cpu_nr(n, src) __next_cpu_nr((n), &(src))
-#define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids)
-#define for_each_cpu_mask_nr(cpu, mask) \
- for ((cpu) = -1; \
- (cpu) = next_cpu_nr((cpu), (mask)), \
- (cpu) < nr_cpu_ids; )
-
-#endif /* NR_CPUS > 64 */
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also,
+ * not all bits may be allocated. */
+#define nr_cpumask_bits nr_cpu_ids
+#else
+#define nr_cpumask_bits NR_CPUS
+#endif
/*
* The following particular system cpumasks and operations manage
@@ -487,12 +80,6 @@ extern const struct cpumask *const cpu_online_mask;
extern const struct cpumask *const cpu_present_mask;
extern const struct cpumask *const cpu_active_mask;
-/* These strip const, as traditionally they weren't const. */
-#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask)
-#define cpu_online_map (*(cpumask_t *)cpu_online_mask)
-#define cpu_present_map (*(cpumask_t *)cpu_present_mask)
-#define cpu_active_map (*(cpumask_t *)cpu_active_mask)
-
#if NR_CPUS > 1
#define num_online_cpus() cpumask_weight(cpu_online_mask)
#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
@@ -511,35 +98,6 @@ extern const struct cpumask *const cpu_active_mask;
#define cpu_active(cpu) ((cpu) == 0)
#endif
-#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
-
-/* These are the new versions of the cpumask operators: passed by pointer.
- * The older versions will be implemented in terms of these, then deleted. */
-#define cpumask_bits(maskp) ((maskp)->bits)
-
-#if NR_CPUS <= BITS_PER_LONG
-#define CPU_BITS_ALL \
-{ \
- [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
-}
-
-#else /* NR_CPUS > BITS_PER_LONG */
-
-#define CPU_BITS_ALL \
-{ \
- [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
- [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
-}
-#endif /* NR_CPUS > BITS_PER_LONG */
-
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also,
- * not all bits may be allocated. */
-#define nr_cpumask_bits nr_cpu_ids
-#else
-#define nr_cpumask_bits NR_CPUS
-#endif
-
/* verify cpu argument to cpumask_* operators */
static inline unsigned int cpumask_check(unsigned int cpu)
{
@@ -1100,4 +658,241 @@ void set_cpu_active(unsigned int cpu, bool active);
void init_cpu_present(const struct cpumask *src);
void init_cpu_possible(const struct cpumask *src);
void init_cpu_online(const struct cpumask *src);
+
+/**
+ * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
+ * @bitmap: the bitmap
+ *
+ * There are a few places where cpumask_var_t isn't appropriate and
+ * static cpumasks must be used (eg. very early boot), yet we don't
+ * expose the definition of 'struct cpumask'.
+ *
+ * This does the conversion, and can be used as a constant initializer.
+ */
+#define to_cpumask(bitmap) \
+ ((struct cpumask *)(1 ? (bitmap) \
+ : (void *)sizeof(__check_is_bitmap(bitmap))))
+
+static inline int __check_is_bitmap(const unsigned long *bitmap)
+{
+ return 1;
+}
+
+/*
+ * Special-case data structure for "single bit set only" constant CPU masks.
+ *
+ * We pre-generate all the 64 (or 32) possible bit positions, with enough
+ * padding to the left and the right, and return the constant pointer
+ * appropriately offset.
+ */
+extern const unsigned long
+ cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
+
+static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
+{
+ const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
+ p -= cpu / BITS_PER_LONG;
+ return to_cpumask(p);
+}
+
+#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
+
+#if NR_CPUS <= BITS_PER_LONG
+#define CPU_BITS_ALL \
+{ \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+}
+
+#else /* NR_CPUS > BITS_PER_LONG */
+
+#define CPU_BITS_ALL \
+{ \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+}
+#endif /* NR_CPUS > BITS_PER_LONG */
+
+/*
+ *
+ * From here down, all obsolete. Use cpumask_ variants!
+ *
+ */
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
+/* These strip const, as traditionally they weren't const. */
+#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask)
+#define cpu_online_map (*(cpumask_t *)cpu_online_mask)
+#define cpu_present_map (*(cpumask_t *)cpu_present_mask)
+#define cpu_active_map (*(cpumask_t *)cpu_active_mask)
+
+#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
+
+#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
+
+#if NR_CPUS <= BITS_PER_LONG
+
+#define CPU_MASK_ALL \
+(cpumask_t) { { \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+} }
+
+#else
+
+#define CPU_MASK_ALL \
+(cpumask_t) { { \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+} }
+
+#endif
+
+#define CPU_MASK_NONE \
+(cpumask_t) { { \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
+} }
+
+#define CPU_MASK_CPU0 \
+(cpumask_t) { { \
+ [0] = 1UL \
+} }
+
+#if NR_CPUS == 1
+#define first_cpu(src) ({ (void)(src); 0; })
+#define next_cpu(n, src) ({ (void)(src); 1; })
+#define any_online_cpu(mask) 0
+#define for_each_cpu_mask(cpu, mask) \
+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#else /* NR_CPUS > 1 */
+int __first_cpu(const cpumask_t *srcp);
+int __next_cpu(int n, const cpumask_t *srcp);
+int __any_online_cpu(const cpumask_t *mask);
+
+#define first_cpu(src) __first_cpu(&(src))
+#define next_cpu(n, src) __next_cpu((n), &(src))
+#define any_online_cpu(mask) __any_online_cpu(&(mask))
+#define for_each_cpu_mask(cpu, mask) \
+ for ((cpu) = -1; \
+ (cpu) = next_cpu((cpu), (mask)), \
+ (cpu) < NR_CPUS; )
+#endif /* SMP */
+
+#if NR_CPUS <= 64
+
+#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask)
+
+#else /* NR_CPUS > 64 */
+
+int __next_cpu_nr(int n, const cpumask_t *srcp);
+#define for_each_cpu_mask_nr(cpu, mask) \
+ for ((cpu) = -1; \
+ (cpu) = __next_cpu_nr((cpu), &(mask)), \
+ (cpu) < nr_cpu_ids; )
+
+#endif /* NR_CPUS > 64 */
+
+#define cpus_addr(src) ((src).bits)
+
+#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
+static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
+{
+ set_bit(cpu, dstp->bits);
+}
+
+#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
+static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
+{
+ clear_bit(cpu, dstp->bits);
+}
+
+#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
+static inline void __cpus_setall(cpumask_t *dstp, int nbits)
+{
+ bitmap_fill(dstp->bits, nbits);
+}
+
+#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
+static inline void __cpus_clear(cpumask_t *dstp, int nbits)
+{
+ bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+
+#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
+static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
+{
+ return test_and_set_bit(cpu, addr->bits);
+}
+
+#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
+static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_andnot(dst, src1, src2) \
+ __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
+static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_equal(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_intersects(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_subset(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
+static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_empty(srcp->bits, nbits);
+}
+
+#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
+static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_weight(srcp->bits, nbits);
+}
+
+#define cpus_shift_left(dst, src, n) \
+ __cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_left(cpumask_t *dstp,
+ const cpumask_t *srcp, int n, int nbits)
+{
+ bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+
#endif /* __LINUX_CPUMASK_H */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index fb371601a3b..4e3387a89cb 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -176,23 +176,7 @@ extern void __invalid_creds(const struct cred *, const char *, unsigned);
extern void __validate_process_creds(struct task_struct *,
const char *, unsigned);
-static inline bool creds_are_invalid(const struct cred *cred)
-{
- if (cred->magic != CRED_MAGIC)
- return true;
- if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
- return true;
-#ifdef CONFIG_SECURITY_SELINUX
- if (selinux_is_enabled()) {
- if ((unsigned long) cred->security < PAGE_SIZE)
- return true;
- if ((*(u32 *)cred->security & 0xffffff00) ==
- (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
- return true;
- }
-#endif
- return false;
-}
+extern bool creds_are_invalid(const struct cred *cred);
static inline void __validate_creds(const struct cred *cred,
const char *file, unsigned line)
diff --git a/include/linux/dca.h b/include/linux/dca.h
index 9c20c7e87d0..d27a7a05718 100644
--- a/include/linux/dca.h
+++ b/include/linux/dca.h
@@ -20,6 +20,9 @@
*/
#ifndef DCA_H
#define DCA_H
+
+#include <linux/pci.h>
+
/* DCA Provider API */
/* DCA Notifier Interface */
@@ -36,6 +39,12 @@ struct dca_provider {
int id;
};
+struct dca_domain {
+ struct list_head node;
+ struct list_head dca_providers;
+ struct pci_bus *pci_rc;
+};
+
struct dca_ops {
int (*add_requester) (struct dca_provider *, struct device *);
int (*remove_requester) (struct dca_provider *, struct device *);
@@ -47,7 +56,7 @@ struct dca_ops {
struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
void free_dca_provider(struct dca_provider *dca);
int register_dca_provider(struct dca_provider *dca, struct device *dev);
-void unregister_dca_provider(struct dca_provider *dca);
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
static inline void *dca_priv(struct dca_provider *dca)
{
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index eb5c2ba2f81..fc1b930f246 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -9,7 +9,7 @@
* 2 as published by the Free Software Foundation.
*
* debugfs is for people to use instead of /proc or /sys.
- * See Documentation/DocBook/kernel-api for more details.
+ * See Documentation/DocBook/filesystems for more details.
*/
#ifndef _DEBUGFS_H_
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ffefba81c81..2b9f2ac7ed6 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -48,19 +48,20 @@ enum dma_status {
/**
* enum dma_transaction_type - DMA transaction types/indexes
+ *
+ * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is
+ * automatically set as dma devices are registered.
*/
enum dma_transaction_type {
DMA_MEMCPY,
DMA_XOR,
- DMA_PQ_XOR,
- DMA_DUAL_XOR,
- DMA_PQ_UPDATE,
- DMA_ZERO_SUM,
- DMA_PQ_ZERO_SUM,
+ DMA_PQ,
+ DMA_XOR_VAL,
+ DMA_PQ_VAL,
DMA_MEMSET,
- DMA_MEMCPY_CRC32C,
DMA_INTERRUPT,
DMA_PRIVATE,
+ DMA_ASYNC_TX,
DMA_SLAVE,
};
@@ -70,18 +71,25 @@ enum dma_transaction_type {
/**
* enum dma_ctrl_flags - DMA flags to augment operation preparation,
- * control completion, and communicate status.
+ * control completion, and communicate status.
* @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
- * this transaction
+ * this transaction
* @DMA_CTRL_ACK - the descriptor cannot be reused until the client
- * acknowledges receipt, i.e. has has a chance to establish any
- * dependency chains
+ * acknowledges receipt, i.e. has has a chance to establish any dependency
+ * chains
* @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
* @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
* @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
* (if not set, do the source dma-unmapping as page)
* @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
* (if not set, do the destination dma-unmapping as page)
+ * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
+ * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
+ * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
+ * sources that were the result of a previous operation, in the case of a PQ
+ * operation it continues the calculation with new sources
+ * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
+ * on the result of this operation
*/
enum dma_ctrl_flags {
DMA_PREP_INTERRUPT = (1 << 0),
@@ -90,9 +98,32 @@ enum dma_ctrl_flags {
DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
+ DMA_PREP_PQ_DISABLE_P = (1 << 6),
+ DMA_PREP_PQ_DISABLE_Q = (1 << 7),
+ DMA_PREP_CONTINUE = (1 << 8),
+ DMA_PREP_FENCE = (1 << 9),
};
/**
+ * enum sum_check_bits - bit position of pq_check_flags
+ */
+enum sum_check_bits {
+ SUM_CHECK_P = 0,
+ SUM_CHECK_Q = 1,
+};
+
+/**
+ * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
+ * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
+ * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
+ */
+enum sum_check_flags {
+ SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
+ SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
+};
+
+
+/**
* dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
* See linux/cpumask.h
*/
@@ -180,8 +211,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param);
* @flags: flags to augment operation preparation, control completion, and
* communicate status
* @phys: physical address of the descriptor
- * @tx_list: driver common field for operations that require multiple
- * descriptors
* @chan: target channel for this operation
* @tx_submit: set the prepared descriptor(s) to be executed by the engine
* @callback: routine to call after this operation is complete
@@ -195,7 +224,6 @@ struct dma_async_tx_descriptor {
dma_cookie_t cookie;
enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
dma_addr_t phys;
- struct list_head tx_list;
struct dma_chan *chan;
dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
dma_async_tx_callback callback;
@@ -213,6 +241,11 @@ struct dma_async_tx_descriptor {
* @global_node: list_head for global dma_device_list
* @cap_mask: one or more dma_capability flags
* @max_xor: maximum number of xor sources, 0 if no capability
+ * @max_pq: maximum number of PQ sources and PQ-continue capability
+ * @copy_align: alignment shift for memcpy operations
+ * @xor_align: alignment shift for xor operations
+ * @pq_align: alignment shift for pq operations
+ * @fill_align: alignment shift for memset operations
* @dev_id: unique device ID
* @dev: struct device reference for dma mapping api
* @device_alloc_chan_resources: allocate resources and return the
@@ -220,7 +253,9 @@ struct dma_async_tx_descriptor {
* @device_free_chan_resources: release DMA channel's resources
* @device_prep_dma_memcpy: prepares a memcpy operation
* @device_prep_dma_xor: prepares a xor operation
- * @device_prep_dma_zero_sum: prepares a zero_sum operation
+ * @device_prep_dma_xor_val: prepares a xor validation operation
+ * @device_prep_dma_pq: prepares a pq operation
+ * @device_prep_dma_pq_val: prepares a pqzero_sum operation
* @device_prep_dma_memset: prepares a memset operation
* @device_prep_dma_interrupt: prepares an end of chain interrupt operation
* @device_prep_slave_sg: prepares a slave dma operation
@@ -235,7 +270,13 @@ struct dma_device {
struct list_head channels;
struct list_head global_node;
dma_cap_mask_t cap_mask;
- int max_xor;
+ unsigned short max_xor;
+ unsigned short max_pq;
+ u8 copy_align;
+ u8 xor_align;
+ u8 pq_align;
+ u8 fill_align;
+ #define DMA_HAS_PQ_CONTINUE (1 << 15)
int dev_id;
struct device *dev;
@@ -249,9 +290,17 @@ struct dma_device {
struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
unsigned int src_cnt, size_t len, unsigned long flags);
- struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
+ struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
- size_t len, u32 *result, unsigned long flags);
+ size_t len, enum sum_check_flags *result, unsigned long flags);
+ struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
+ struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, unsigned long flags);
+ struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
+ struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ enum sum_check_flags *pqres, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
unsigned long flags);
@@ -270,6 +319,96 @@ struct dma_device {
void (*device_issue_pending)(struct dma_chan *chan);
};
+static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
+{
+ size_t mask;
+
+ if (!align)
+ return true;
+ mask = (1 << align) - 1;
+ if (mask & (off1 | off2 | len))
+ return false;
+ return true;
+}
+
+static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
+ size_t off2, size_t len)
+{
+ return dmaengine_check_align(dev->copy_align, off1, off2, len);
+}
+
+static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
+ size_t off2, size_t len)
+{
+ return dmaengine_check_align(dev->xor_align, off1, off2, len);
+}
+
+static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
+ size_t off2, size_t len)
+{
+ return dmaengine_check_align(dev->pq_align, off1, off2, len);
+}
+
+static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
+ size_t off2, size_t len)
+{
+ return dmaengine_check_align(dev->fill_align, off1, off2, len);
+}
+
+static inline void
+dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
+{
+ dma->max_pq = maxpq;
+ if (has_pq_continue)
+ dma->max_pq |= DMA_HAS_PQ_CONTINUE;
+}
+
+static inline bool dmaf_continue(enum dma_ctrl_flags flags)
+{
+ return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
+}
+
+static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
+{
+ enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
+
+ return (flags & mask) == mask;
+}
+
+static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
+{
+ return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
+}
+
+static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+{
+ return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
+}
+
+/* dma_maxpq - reduce maxpq in the face of continued operations
+ * @dma - dma device with PQ capability
+ * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
+ *
+ * When an engine does not support native continuation we need 3 extra
+ * source slots to reuse P and Q with the following coefficients:
+ * 1/ {00} * P : remove P from Q', but use it as a source for P'
+ * 2/ {01} * Q : use Q to continue Q' calculation
+ * 3/ {00} * Q : subtract Q from P' to cancel (2)
+ *
+ * In the case where P is disabled we only need 1 extra source:
+ * 1/ {01} * Q : use Q to continue Q' calculation
+ */
+static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
+{
+ if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
+ return dma_dev_to_maxpq(dma);
+ else if (dmaf_p_disabled_continue(flags))
+ return dma_dev_to_maxpq(dma) - 1;
+ else if (dmaf_continue(flags))
+ return dma_dev_to_maxpq(dma) - 3;
+ BUG();
+}
+
/* --- public DMA engine API --- */
#ifdef CONFIG_DMA_ENGINE
@@ -299,7 +438,11 @@ static inline void net_dmaengine_put(void)
#ifdef CONFIG_ASYNC_TX_DMA
#define async_dmaengine_get() dmaengine_get()
#define async_dmaengine_put() dmaengine_put()
+#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
+#else
#define async_dma_find_channel(type) dma_find_channel(type)
+#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
#else
static inline void async_dmaengine_get(void)
{
@@ -312,7 +455,7 @@ async_dma_find_channel(enum dma_transaction_type type)
{
return NULL;
}
-#endif
+#endif /* CONFIG_ASYNC_TX_DMA */
dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
void *dest, void *src, size_t len);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51803528b09..2adaa2529f1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -595,6 +595,7 @@ struct address_space_operations {
int (*launder_page) (struct page *);
int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
unsigned long);
+ int (*error_remove_page)(struct address_space *, struct page *);
};
/*
@@ -640,7 +641,6 @@ struct block_device {
struct super_block * bd_super;
int bd_openers;
struct mutex bd_mutex; /* open/close mutex */
- struct semaphore bd_mount_sem;
struct list_head bd_inodes;
void * bd_holder;
int bd_holders;
@@ -1315,7 +1315,7 @@ struct super_block {
unsigned long s_blocksize;
unsigned char s_blocksize_bits;
unsigned char s_dirt;
- unsigned long long s_maxbytes; /* Max file size */
+ loff_t s_maxbytes; /* Max file size */
struct file_system_type *s_type;
const struct super_operations *s_op;
const struct dquot_operations *dq_op;
@@ -2156,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t);
extern int inode_needs_sync(struct inode *inode);
extern void generic_delete_inode(struct inode *inode);
extern void generic_drop_inode(struct inode *inode);
+extern int generic_detach_inode(struct inode *inode);
extern struct inode *ilookup5_nowait(struct super_block *sb,
unsigned long hashval, int (*test)(struct inode *, void *),
@@ -2334,6 +2335,7 @@ extern void get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_active_super(struct block_device *bdev);
extern struct super_block *user_get_super(dev_t);
extern void drop_super(struct super_block *sb);
@@ -2381,7 +2383,8 @@ extern int buffer_migrate_page(struct address_space *,
#define buffer_migrate_page NULL
#endif
-extern int inode_change_ok(struct inode *, struct iattr *);
+extern int inode_change_ok(const struct inode *, struct iattr *);
+extern int inode_newsize_ok(const struct inode *, loff_t offset);
extern int __must_check inode_setattr(struct inode *, struct iattr *);
extern void file_update_time(struct file *file);
@@ -2467,7 +2470,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos);
struct ctl_table;
-int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
+int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
int __init get_filesystem_list(char *buf);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3c0924a18da..cd3d2abaf30 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -19,7 +19,7 @@
extern int ftrace_enabled;
extern int
ftrace_enable_sysctl(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
@@ -94,7 +94,7 @@ static inline void ftrace_start(void) { }
extern int stack_tracer_enabled;
int
stack_trace_sysctl(struct ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
#endif
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 34956c8fdeb..8ec17997d94 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -4,11 +4,6 @@
#include <linux/compiler.h>
#include <linux/types.h>
-struct inode;
-struct mm_struct;
-struct task_struct;
-union ktime;
-
/* Second argument to futex syscall */
@@ -129,6 +124,11 @@ struct robust_list_head {
#define FUTEX_BITSET_MATCH_ANY 0xffffffff
#ifdef __KERNEL__
+struct inode;
+struct mm_struct;
+struct task_struct;
+union ktime;
+
long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 176e7ee73ef..11ab19ac6b3 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
}
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
-int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
+int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
new file mode 100644
index 00000000000..fc5db826b48
--- /dev/null
+++ b/include/linux/i2c/adp5588.h
@@ -0,0 +1,92 @@
+/*
+ * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _ADP5588_H
+#define _ADP5588_H
+
+#define DEV_ID 0x00 /* Device ID */
+#define CFG 0x01 /* Configuration Register1 */
+#define INT_STAT 0x02 /* Interrupt Status Register */
+#define KEY_LCK_EC_STAT 0x03 /* Key Lock and Event Counter Register */
+#define Key_EVENTA 0x04 /* Key Event Register A */
+#define Key_EVENTB 0x05 /* Key Event Register B */
+#define Key_EVENTC 0x06 /* Key Event Register C */
+#define Key_EVENTD 0x07 /* Key Event Register D */
+#define Key_EVENTE 0x08 /* Key Event Register E */
+#define Key_EVENTF 0x09 /* Key Event Register F */
+#define Key_EVENTG 0x0A /* Key Event Register G */
+#define Key_EVENTH 0x0B /* Key Event Register H */
+#define Key_EVENTI 0x0C /* Key Event Register I */
+#define Key_EVENTJ 0x0D /* Key Event Register J */
+#define KP_LCK_TMR 0x0E /* Keypad Lock1 to Lock2 Timer */
+#define UNLOCK1 0x0F /* Unlock Key1 */
+#define UNLOCK2 0x10 /* Unlock Key2 */
+#define GPIO_INT_STAT1 0x11 /* GPIO Interrupt Status */
+#define GPIO_INT_STAT2 0x12 /* GPIO Interrupt Status */
+#define GPIO_INT_STAT3 0x13 /* GPIO Interrupt Status */
+#define GPIO_DAT_STAT1 0x14 /* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT2 0x15 /* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT3 0x16 /* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_OUT1 0x17 /* GPIO DATA OUT */
+#define GPIO_DAT_OUT2 0x18 /* GPIO DATA OUT */
+#define GPIO_DAT_OUT3 0x19 /* GPIO DATA OUT */
+#define GPIO_INT_EN1 0x1A /* GPIO Interrupt Enable */
+#define GPIO_INT_EN2 0x1B /* GPIO Interrupt Enable */
+#define GPIO_INT_EN3 0x1C /* GPIO Interrupt Enable */
+#define KP_GPIO1 0x1D /* Keypad or GPIO Selection */
+#define KP_GPIO2 0x1E /* Keypad or GPIO Selection */
+#define KP_GPIO3 0x1F /* Keypad or GPIO Selection */
+#define GPI_EM1 0x20 /* GPI Event Mode 1 */
+#define GPI_EM2 0x21 /* GPI Event Mode 2 */
+#define GPI_EM3 0x22 /* GPI Event Mode 3 */
+#define GPIO_DIR1 0x23 /* GPIO Data Direction */
+#define GPIO_DIR2 0x24 /* GPIO Data Direction */
+#define GPIO_DIR3 0x25 /* GPIO Data Direction */
+#define GPIO_INT_LVL1 0x26 /* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL2 0x27 /* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL3 0x28 /* GPIO Edge/Level Detect */
+#define Debounce_DIS1 0x29 /* Debounce Disable */
+#define Debounce_DIS2 0x2A /* Debounce Disable */
+#define Debounce_DIS3 0x2B /* Debounce Disable */
+#define GPIO_PULL1 0x2C /* GPIO Pull Disable */
+#define GPIO_PULL2 0x2D /* GPIO Pull Disable */
+#define GPIO_PULL3 0x2E /* GPIO Pull Disable */
+#define CMP_CFG_STAT 0x30 /* Comparator Configuration and Status Register */
+#define CMP_CONFG_SENS1 0x31 /* Sensor1 Comparator Configuration Register */
+#define CMP_CONFG_SENS2 0x32 /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */
+#define CMP1_LVL2_TRIP 0x33 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */
+#define CMP1_LVL2_HYS 0x34 /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */
+#define CMP1_LVL3_TRIP 0x35 /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */
+#define CMP1_LVL3_HYS 0x36 /* Sensor 2 Comparator Configuration Register */
+#define CMP2_LVL2_TRIP 0x37 /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */
+#define CMP2_LVL2_HYS 0x38 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */
+#define CMP2_LVL3_TRIP 0x39 /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */
+#define CMP2_LVL3_HYS 0x3A /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */
+#define CMP1_ADC_DAT_R1 0x3B /* Comparator 1 ADC data Register1 */
+#define CMP1_ADC_DAT_R2 0x3C /* Comparator 1 ADC data Register2 */
+#define CMP2_ADC_DAT_R1 0x3D /* Comparator 2 ADC data Register1 */
+#define CMP2_ADC_DAT_R2 0x3E /* Comparator 2 ADC data Register2 */
+
+#define ADP5588_DEVICE_ID_MASK 0xF
+
+/* Put one of these structures in i2c_board_info platform_data */
+
+#define ADP5588_KEYMAPSIZE 80
+
+struct adp5588_kpad_platform_data {
+ int rows; /* Number of rows */
+ int cols; /* Number of columns */
+ const unsigned short *keymap; /* Pointer to keymap */
+ unsigned short keymapsize; /* Keymap size */
+ unsigned repeat:1; /* Enable key repeat */
+ unsigned en_keylock:1; /* Enable Key Lock feature */
+ unsigned short unlock_key1; /* Unlock Key 1 */
+ unsigned short unlock_key2; /* Unlock Key 2 */
+};
+
+#endif
diff --git a/include/linux/i2c/mcs5000_ts.h b/include/linux/i2c/mcs5000_ts.h
new file mode 100644
index 00000000000..5a117b5ca15
--- /dev/null
+++ b/include/linux/i2c/mcs5000_ts.h
@@ -0,0 +1,24 @@
+/*
+ * mcs5000_ts.h
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MCS5000_TS_H
+#define __LINUX_MCS5000_TS_H
+
+/* platform data for the MELFAS MCS-5000 touchscreen driver */
+struct mcs5000_ts_platform_data {
+ void (*cfg_pin)(void);
+ int x_size;
+ int y_size;
+};
+
+#endif /* __LINUX_MCS5000_TS_H */
diff --git a/include/linux/i8042.h b/include/linux/i8042.h
index 7907a72403e..60c3360ef6a 100644
--- a/include/linux/i8042.h
+++ b/include/linux/i8042.h
@@ -7,6 +7,7 @@
* the Free Software Foundation.
*/
+#include <linux/types.h>
/*
* Standard commands.
@@ -30,6 +31,35 @@
#define I8042_CMD_MUX_PFX 0x0090
#define I8042_CMD_MUX_SEND 0x1090
+struct serio;
+
+#if defined(CONFIG_SERIO_I8042) || defined(CONFIG_SERIO_I8042_MODULE)
+
+void i8042_lock_chip(void);
+void i8042_unlock_chip(void);
int i8042_command(unsigned char *param, int command);
+bool i8042_check_port_owner(const struct serio *);
+
+#else
+
+void i8042_lock_chip(void)
+{
+}
+
+void i8042_unlock_chip(void)
+{
+}
+
+int i8042_command(unsigned char *param, int command)
+{
+ return -ENOSYS;
+}
+
+bool i8042_check_port_owner(const struct serio *serio)
+{
+ return false;
+}
+
+#endif
#endif
diff --git a/include/linux/input.h b/include/linux/input.h
index 8b3bc3e0d14..0ccfc30cd40 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1123,7 +1123,7 @@ struct input_dev {
struct mutex mutex;
unsigned int users;
- int going_away;
+ bool going_away;
struct device dev;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 8e9e151f811..b78cf819495 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -84,7 +84,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
* struct irqaction - per interrupt action descriptor
* @handler: interrupt handler function
* @flags: flags (see IRQF_* above)
- * @mask: no comment as it is useless and about to be removed
* @name: name of the device
* @dev_id: cookie to identify the device
* @next: pointer to the next irqaction for shared interrupts
@@ -97,7 +96,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
struct irqaction {
irq_handler_t handler;
unsigned long flags;
- cpumask_t mask;
const char *name;
void *dev_id;
struct irqaction *next;
diff --git a/include/linux/libps2.h b/include/linux/libps2.h
index fcf5fbe6a50..79603a6c356 100644
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -44,6 +44,8 @@ struct ps2dev {
void ps2_init(struct ps2dev *ps2dev, struct serio *serio);
int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout);
void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout);
+void ps2_begin_command(struct ps2dev *ps2dev);
+void ps2_end_command(struct ps2dev *ps2dev);
int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data);
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 691f59171c6..5126cceb6ae 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -57,6 +57,7 @@
#ifdef __ASSEMBLY__
+#ifndef LINKER_SCRIPT
#define ALIGN __ALIGN
#define ALIGN_STR __ALIGN_STR
@@ -66,6 +67,7 @@
ALIGN; \
name:
#endif
+#endif /* LINKER_SCRIPT */
#ifndef WEAK
#define WEAK(name) \
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e46a0734ab6..bf9213b2db8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void)
extern bool mem_cgroup_oom_called(struct task_struct *task);
void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask, int nid,
+ int zid);
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct mem_cgroup;
@@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
{
}
+static inline
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask, int nid, int zid)
+{
+ return 0;
+}
+
#endif /* CONFIG_CGROUP_MEM_CONT */
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b6eae5e3144..df08551cb0a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -695,11 +695,12 @@ static inline int page_mapped(struct page *page)
#define VM_FAULT_SIGBUS 0x0002
#define VM_FAULT_MAJOR 0x0004
#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */
+#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */
#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */
#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
-#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS)
+#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
/*
* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
@@ -791,8 +792,14 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
unmap_mapping_range(mapping, holebegin, holelen, 0);
}
-extern int vmtruncate(struct inode * inode, loff_t offset);
-extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
+extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
+extern int vmtruncate(struct inode *inode, loff_t offset);
+extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
+
+int truncate_inode_page(struct address_space *mapping, struct page *page);
+int generic_error_remove_page(struct address_space *mapping, struct page *page);
+
+int invalidate_inode_page(struct page *page);
#ifdef CONFIG_MMU
extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1279,7 +1286,7 @@ int in_gate_area_no_task(unsigned long addr);
#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
#endif /* __HAVE_ARCH_GATE_AREA */
-int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
+int drop_caches_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
unsigned long lru_pages);
@@ -1308,5 +1315,12 @@ void vmemmap_populate_print_last(void);
extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
size_t size);
extern void refund_locked_memory(struct mm_struct *mm, size_t size);
+
+extern void memory_failure(unsigned long pfn, int trapno);
+extern int __memory_failure(unsigned long pfn, int trapno, int ref);
+extern int sysctl_memory_failure_early_kill;
+extern int sysctl_memory_failure_recovery;
+extern atomic_long_t mce_bad_pages;
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0042090a4d7..21d6aa45206 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -240,6 +240,8 @@ struct mm_struct {
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
+ struct linux_binfmt *binfmt;
+
cpumask_t cpu_vm_mask;
/* Architecture-specific MM context */
@@ -259,11 +261,10 @@ struct mm_struct {
unsigned long flags; /* Must use atomic bitops to access the bits */
struct core_state *core_state; /* coredumping support */
-
- /* aio bits */
+#ifdef CONFIG_AIO
spinlock_t ioctx_lock;
struct hlist_head ioctx_list;
-
+#endif
#ifdef CONFIG_MM_OWNER
/*
* "owner" points to a task that is regarded as the canonical
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 652ef01be58..6f7561730d8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone)
/* These two functions are used to setup the per zone pages min values */
struct ctl_table;
-struct file;
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *,
+int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
- struct file *, void __user *, size_t *, loff_t *);
+ void __user *, size_t *, loff_t *);
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
- struct file *, void __user *, size_t *, loff_t *);
+ void __user *, size_t *, loff_t *);
extern int numa_zonelist_order_handler(struct ctl_table *, int,
- struct file *, void __user *, size_t *, loff_t *);
+ void __user *, size_t *, loff_t *);
extern char numa_zonelist_order[];
#define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */
diff --git a/include/linux/module.h b/include/linux/module.h
index 1c755b2f937..482efc865ac 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -128,7 +128,10 @@ extern struct module __this_module;
*/
#define MODULE_LICENSE(_license) MODULE_INFO(license, _license)
-/* Author, ideally of form NAME[, NAME]*[ and NAME] */
+/*
+ * Author(s), use "Name <email>" or just "Name", for multiple
+ * authors use multiple MODULE_AUTHOR() statements/lines.
+ */
#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
/* What your module does. */
@@ -308,10 +311,14 @@ struct module
#endif
#ifdef CONFIG_KALLSYMS
- /* We keep the symbol and string tables for kallsyms. */
- Elf_Sym *symtab;
- unsigned int num_symtab;
- char *strtab;
+ /*
+ * We keep the symbol and string tables for kallsyms.
+ * The core_* fields below are temporary, loader-only (they
+ * could really be discarded after module init).
+ */
+ Elf_Sym *symtab, *core_symtab;
+ unsigned int num_symtab, core_num_syms;
+ char *strtab, *core_strtab;
/* Section attributes */
struct module_sect_attrs *sect_attrs;
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 080f6ba9e73..ab5d3126831 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -187,6 +187,7 @@ extern struct sock *netlink_kernel_create(struct net *net,
extern void netlink_kernel_release(struct sock *sk);
extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
+extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group);
extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
extern int netlink_has_listeners(struct sock *sk, unsigned int group);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 13de789f0a5..6b202b17395 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -51,6 +51,9 @@
* PG_buddy is set to indicate that the page is free and in the buddy system
* (see mm/page_alloc.c).
*
+ * PG_hwpoison indicates that a page got corrupted in hardware and contains
+ * data with incorrect ECC bits that triggered a machine check. Accessing is
+ * not safe since it may cause another machine check. Don't touch!
*/
/*
@@ -102,6 +105,9 @@ enum pageflags {
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PG_uncached, /* Page has been mapped as uncached */
#endif
+#ifdef CONFIG_MEMORY_FAILURE
+ PG_hwpoison, /* hardware poisoned page. Don't touch */
+#endif
__NR_PAGEFLAGS,
/* Filesystems */
@@ -269,6 +275,15 @@ PAGEFLAG(Uncached, uncached)
PAGEFLAG_FALSE(Uncached)
#endif
+#ifdef CONFIG_MEMORY_FAILURE
+PAGEFLAG(HWPoison, hwpoison)
+TESTSETFLAG(HWPoison, hwpoison)
+#define __PG_HWPOISON (1UL << PG_hwpoison)
+#else
+PAGEFLAG_FALSE(HWPoison)
+#define __PG_HWPOISON 0
+#endif
+
static inline int PageUptodate(struct page *page)
{
int ret = test_bit(PG_uptodate, &(page)->flags);
@@ -393,7 +408,7 @@ static inline void __ClearPageTail(struct page *page)
1 << PG_private | 1 << PG_private_2 | \
1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
- 1 << PG_unevictable | __PG_MLOCKED)
+ 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
/*
* Flags checked when a page is prepped for return by the page allocator.
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index ada779f2417..4b938d4f3ac 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -38,6 +38,7 @@ enum {
PCG_LOCK, /* page cgroup is locked */
PCG_CACHE, /* charged as cache */
PCG_USED, /* this object is in use. */
+ PCG_ACCT_LRU, /* page has been accounted for */
};
#define TESTPCGFLAG(uname, lname) \
@@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
{ clear_bit(PCG_##lname, &pc->flags); }
+#define TESTCLEARPCGFLAG(uname, lname) \
+static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
+ { return test_and_clear_bit(PCG_##lname, &pc->flags); }
+
/* Cache flag is set only once (at allocation) */
TESTPCGFLAG(Cache, CACHE)
+CLEARPCGFLAG(Cache, CACHE)
+SETPCGFLAG(Cache, CACHE)
TESTPCGFLAG(Used, USED)
CLEARPCGFLAG(Used, USED)
+SETPCGFLAG(Used, USED)
+
+SETPCGFLAG(AcctLRU, ACCT_LRU)
+CLEARPCGFLAG(AcctLRU, ACCT_LRU)
+TESTPCGFLAG(AcctLRU, ACCT_LRU)
+TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
static inline int page_cgroup_nid(struct page_cgroup *pc)
{
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 7803565aa87..da1fda8623e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2527,6 +2527,16 @@
#define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e
#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b
#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719
#define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14
#define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16
#define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18
diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 1ef5a078183..e5126cff9b2 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -38,6 +38,7 @@
#define PNPIPE_IFINDEX 2
#define PNADDR_ANY 0
+#define PNADDR_BROADCAST 0xFC
#define PNPORT_RESOURCE_ROUTING 0
/* Values for PNPIPE_ENCAP option */
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 07bff666e65..931150566ad 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -88,4 +88,6 @@
#define PR_TASK_PERF_EVENTS_DISABLE 31
#define PR_TASK_PERF_EVENTS_ENABLE 32
+#define PR_MCE_KILL 33
+
#endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 953fc055e87..14a86bc7102 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -140,7 +140,7 @@ struct rchan_callbacks
* cause relay_open() to create a single global buffer rather
* than the default set of per-cpu buffers.
*
- * See Documentation/filesystems/relayfs.txt for more info.
+ * See Documentation/filesystems/relay.txt for more info.
*/
struct dentry *(*create_buf_file)(const char *filename,
struct dentry *parent,
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 511f42fc681..731af71cddc 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -35,6 +35,10 @@ struct res_counter {
*/
unsigned long long limit;
/*
+ * the limit that usage can be exceed
+ */
+ unsigned long long soft_limit;
+ /*
* the number of unsuccessful attempts to consume the resource
*/
unsigned long long failcnt;
@@ -87,6 +91,7 @@ enum {
RES_MAX_USAGE,
RES_LIMIT,
RES_FAILCNT,
+ RES_SOFT_LIMIT,
};
/*
@@ -109,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
int __must_check res_counter_charge_locked(struct res_counter *counter,
unsigned long val);
int __must_check res_counter_charge(struct res_counter *counter,
- unsigned long val, struct res_counter **limit_fail_at);
+ unsigned long val, struct res_counter **limit_fail_at,
+ struct res_counter **soft_limit_at);
/*
* uncharge - tell that some portion of the resource is released
@@ -122,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter,
*/
void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+ bool *was_soft_limit_excess);
static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
{
@@ -132,6 +139,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
return false;
}
+static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt)
+{
+ if (cnt->usage < cnt->soft_limit)
+ return true;
+
+ return false;
+}
+
+/**
+ * Get the difference between the usage and the soft limit
+ * @cnt: The counter
+ *
+ * Returns 0 if usage is less than or equal to soft limit
+ * The difference between usage and soft limit, otherwise.
+ */
+static inline unsigned long long
+res_counter_soft_limit_excess(struct res_counter *cnt)
+{
+ unsigned long long excess;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ if (cnt->usage <= cnt->soft_limit)
+ excess = 0;
+ else
+ excess = cnt->usage - cnt->soft_limit;
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return excess;
+}
+
/*
* Helper function to detect if the cgroup is within it's limit or
* not. It's currently called from cgroup_rss_prepare()
@@ -147,6 +184,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt)
return ret;
}
+static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt)
+{
+ bool ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ ret = res_counter_soft_limit_check_locked(cnt);
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return ret;
+}
+
static inline void res_counter_reset_max(struct res_counter *cnt)
{
unsigned long flags;
@@ -180,4 +228,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt,
return ret;
}
+static inline int
+res_counter_set_soft_limit(struct res_counter *cnt,
+ unsigned long long soft_limit)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ cnt->soft_limit = soft_limit;
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return 0;
+}
+
#endif
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 477841d29fc..cb0ba703260 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -81,7 +81,19 @@ static inline void page_dup_rmap(struct page *page)
*/
int page_referenced(struct page *, int is_locked,
struct mem_cgroup *cnt, unsigned long *vm_flags);
-int try_to_unmap(struct page *, int ignore_refs);
+enum ttu_flags {
+ TTU_UNMAP = 0, /* unmap mode */
+ TTU_MIGRATION = 1, /* migration mode */
+ TTU_MUNLOCK = 2, /* munlock mode */
+ TTU_ACTION_MASK = 0xff,
+
+ TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
+ TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
+ TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
+};
+#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
+
+int try_to_unmap(struct page *, enum ttu_flags flags);
/*
* Called from mm/filemap_xip.c to unmap empty zero page
@@ -108,6 +120,13 @@ int page_mkclean(struct page *);
*/
int try_to_munlock(struct page *);
+/*
+ * Called by memory-failure.c to kill processes.
+ */
+struct anon_vma *page_lock_anon_vma(struct page *page);
+void page_unlock_anon_vma(struct anon_vma *anon_vma);
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
+
#else /* !CONFIG_MMU */
#define anon_vma_init() do {} while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbf2a3b4628..75e6e60bf58 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -309,7 +309,7 @@ extern void softlockup_tick(void);
extern void touch_softlockup_watchdog(void);
extern void touch_all_softlockup_watchdogs(void);
extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
extern int softlockup_thresh;
@@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings;
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos);
#endif
@@ -1271,7 +1271,6 @@ struct task_struct {
struct mm_struct *mm, *active_mm;
/* task state */
- struct linux_binfmt *binfmt;
int exit_state;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
@@ -1735,6 +1734,7 @@ extern cputime_t task_gtime(struct task_struct *p);
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
+#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
#define PF_DUMPCORE 0x00000200 /* dumped core */
#define PF_SIGNALED 0x00000400 /* killed by a signal */
@@ -1754,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p);
#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */
+#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
@@ -1817,10 +1818,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
return 0;
}
#endif
+
+#ifndef CONFIG_CPUMASK_OFFSTACK
static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
{
return set_cpus_allowed_ptr(p, &new_mask);
}
+#endif
/*
* Architectures can set this to 1 if they have specified
@@ -1903,7 +1907,7 @@ extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_timer_migration;
int sched_nr_latency_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length,
+ void __user *buffer, size_t *length,
loff_t *ppos);
#endif
#ifdef CONFIG_SCHED_DEBUG
@@ -1921,7 +1925,7 @@ extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;
int sched_rt_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
extern unsigned int sysctl_sched_compat_yield;
@@ -2056,6 +2060,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv);
extern int kill_pid(struct pid *pid, int sig, int priv);
extern int kill_proc_info(int, struct siginfo *, pid_t);
extern int do_notify_parent(struct task_struct *, int);
+extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
extern void force_sig(int, struct task_struct *);
extern void force_sig_specific(int, struct task_struct *);
extern int send_sig(int, struct task_struct *, int);
@@ -2333,7 +2338,10 @@ static inline int signal_pending(struct task_struct *p)
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}
-extern int __fatal_signal_pending(struct task_struct *p);
+static inline int __fatal_signal_pending(struct task_struct *p)
+{
+ return unlikely(sigismember(&p->pending.signal, SIGKILL));
+}
static inline int fatal_signal_pending(struct task_struct *p)
{
diff --git a/include/linux/security.h b/include/linux/security.h
index d050b66ab9e..239e40d0450 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
return PAGE_ALIGN(mmap_min_addr);
return hint;
}
-extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+extern int mmap_min_addr_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#ifdef CONFIG_SECURITY
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 0c6a86b7959..8366d8f12e5 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -35,6 +35,44 @@ struct seq_operations {
#define SEQ_SKIP 1
+/**
+ * seq_get_buf - get buffer to write arbitrary data to
+ * @m: the seq_file handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
+{
+ BUG_ON(m->count > m->size);
+ if (m->count < m->size)
+ *bufp = m->buf + m->count;
+ else
+ *bufp = NULL;
+
+ return m->size - m->count;
+}
+
+/**
+ * seq_commit - commit data to the buffer
+ * @m: the seq_file handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get. To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_commit(struct seq_file *m, int num)
+{
+ if (num < 0) {
+ m->count = m->size;
+ } else {
+ BUG_ON(m->count + num > m->size);
+ m->count += num;
+ }
+}
+
char *mangle_path(char *s, char *p, char *esc);
int seq_open(struct file *, const struct seq_operations *);
ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/signal.h b/include/linux/signal.h
index c7552836bd9..ab9272cc270 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig)
}
extern int next_signal(struct sigpending *pending, sigset_t *mask);
+extern int do_send_sig_info(int sig, struct siginfo *info,
+ struct task_struct *p, bool group);
extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 9e3d8af0920..39c64bae776 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -73,15 +73,6 @@ int smp_call_function(void(*func)(void *info), void *info, int wait);
void smp_call_function_many(const struct cpumask *mask,
void (*func)(void *info), void *info, bool wait);
-/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */
-static inline int
-smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
- int wait)
-{
- smp_call_function_many(&mask, func, info, wait);
- return 0;
-}
-
void __smp_call_function_single(int cpuid, struct call_single_data *data,
int wait);
@@ -144,8 +135,6 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
static inline void smp_send_reschedule(int cpu) { }
#define num_booting_cpus() 1
#define smp_prepare_boot_cpu() do {} while (0)
-#define smp_call_function_mask(mask, func, info, wait) \
- (up_smp_call_function(func, info))
#define smp_call_function_many(mask, func, info, wait) \
(up_smp_call_function(func, info))
static inline void init_call_single_data(void)
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 7da466ba4b0..f5cc0898bc5 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -11,6 +11,7 @@
#include <linux/uio.h>
#include <asm/byteorder.h>
+#include <asm/unaligned.h>
#include <linux/scatterlist.h>
/*
@@ -117,14 +118,14 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le
static inline __be32 *
xdr_encode_hyper(__be32 *p, __u64 val)
{
- *(__be64 *)p = cpu_to_be64(val);
+ put_unaligned_be64(val, p);
return p + 2;
}
static inline __be32 *
xdr_decode_hyper(__be32 *p, __u64 *valp)
{
- *valp = be64_to_cpup((__be64 *)p);
+ *valp = get_unaligned_be64(p);
return p + 2;
}
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 6c990e658f4..4ec90019c1a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -34,16 +34,38 @@ static inline int current_is_kswapd(void)
* the type/offset into the pte as 5/27 as well.
*/
#define MAX_SWAPFILES_SHIFT 5
-#ifndef CONFIG_MIGRATION
-#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT)
+
+/*
+ * Use some of the swap files numbers for other purposes. This
+ * is a convenient way to hook into the VM to trigger special
+ * actions on faults.
+ */
+
+/*
+ * NUMA node memory migration support
+ */
+#ifdef CONFIG_MIGRATION
+#define SWP_MIGRATION_NUM 2
+#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM)
+#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
#else
-/* Use last two entries for page migration swap entries */
-#define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2)
-#define SWP_MIGRATION_READ MAX_SWAPFILES
-#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1)
+#define SWP_MIGRATION_NUM 0
#endif
/*
+ * Handling of hardware poisoned pages with memory corruption.
+ */
+#ifdef CONFIG_MEMORY_FAILURE
+#define SWP_HWPOISON_NUM 1
+#define SWP_HWPOISON MAX_SWAPFILES
+#else
+#define SWP_HWPOISON_NUM 0
+#endif
+
+#define MAX_SWAPFILES \
+ ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+
+/*
* Magic header for a swap area. The first part of the union is
* what the swap magic looks like for the old (limited to 128MB)
* swap area format, the second part of the union adds - in the
@@ -217,6 +239,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
unsigned int swappiness);
+extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+ gfp_t gfp_mask, bool noswap,
+ unsigned int swappiness,
+ struct zone *zone,
+ int nid);
extern int __isolate_lru_page(struct page *page, int mode, int file);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
@@ -240,7 +267,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma);
extern void scan_mapping_unevictable_pages(struct address_space *);
extern unsigned long scan_unevictable_pages;
-extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
+extern int scan_unevictable_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int scan_unevictable_register_node(struct node *node);
extern void scan_unevictable_unregister_node(struct node *node);
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 6ec39ab27b4..cd42e30b7c6 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry)
#endif
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_HWPOISON;
+}
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
+static inline int non_swap_entry(swp_entry_t entry)
+{
+ return swp_type(entry) >= MAX_SWAPFILES;
+}
+#else
+static inline int non_swap_entry(swp_entry_t entry)
+{
+ return 0;
+}
+#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index e76d3b22a46..1e4743ee683 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -29,7 +29,6 @@
#include <linux/types.h>
#include <linux/compiler.h>
-struct file;
struct completion;
#define CTL_MAXNAME 10 /* how many path components do we allow in a
@@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen);
-typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp,
+typedef int proc_handler (struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-extern int proc_dostring(struct ctl_table *, int, struct file *,
+extern int proc_dostring(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_dointvec(struct ctl_table *, int, struct file *,
+extern int proc_dointvec(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_minmax(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_jiffies(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_ms_jiffies(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_doulongvec_minmax(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
- struct file *, void __user *, size_t *, loff_t *);
+ void __user *, size_t *, loff_t *);
extern int do_sysctl (int __user *name, int nlen,
void __user *oldval, size_t __user *oldlenp,
diff --git a/include/linux/time.h b/include/linux/time.h
index 56787c09334..fe04e5ef6a5 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond);
struct tms;
extern void do_sys_times(struct tms *);
+/*
+ * Similar to the struct tm in userspace <time.h>, but it needs to be here so
+ * that the kernel source is self contained.
+ */
+struct tm {
+ /*
+ * the number of seconds after the minute, normally in the range
+ * 0 to 59, but can be up to 60 to allow for leap seconds
+ */
+ int tm_sec;
+ /* the number of minutes after the hour, in the range 0 to 59*/
+ int tm_min;
+ /* the number of hours past midnight, in the range 0 to 23 */
+ int tm_hour;
+ /* the day of the month, in the range 1 to 31 */
+ int tm_mday;
+ /* the number of months since January, in the range 0 to 11 */
+ int tm_mon;
+ /* the number of years since 1900 */
+ long tm_year;
+ /* the number of days since Sunday, in the range 0 to 6 */
+ int tm_wday;
+ /* the number of days since January 1, in the range 0 to 365 */
+ int tm_yday;
+};
+
+void time_to_tm(time_t totalsecs, int offset, struct tm *result);
+
/**
* timespec_to_ns - Convert timespec to nanoseconds
* @ts: pointer to the timespec variable to be converted
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 809b26c0709..fc0bf3edeb6 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -211,12 +211,6 @@ int arch_update_cpu_topology(void);
#ifndef topology_core_id
#define topology_core_id(cpu) ((void)(cpu), 0)
#endif
-#ifndef topology_thread_siblings
-#define topology_thread_siblings(cpu) cpumask_of_cpu(cpu)
-#endif
-#ifndef topology_core_siblings
-#define topology_core_siblings(cpu) cpumask_of_cpu(cpu)
-#endif
#ifndef topology_thread_cpumask
#define topology_thread_cpumask(cpu) cpumask_of(cpu)
#endif
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 17ba82efa48..1eb44a924e5 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -1,7 +1,7 @@
/*
* Tracing hooks
*
- * Copyright (C) 2008 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
@@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task,
/**
* tracehook_notify_jctl - report about job control stop/continue
- * @notify: nonzero if this is the last thread in the group to stop
+ * @notify: zero, %CLD_STOPPED or %CLD_CONTINUED
* @why: %CLD_STOPPED or %CLD_CONTINUED
*
* This is called when we might call do_notify_parent_cldstop().
- * It's called when about to stop for job control; we are already in
- * %TASK_STOPPED state, about to call schedule(). It's also called when
- * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made.
*
- * Return nonzero to generate a %SIGCHLD with @why, which is
- * normal if @notify is nonzero.
+ * @notify is zero if we would not ordinarily send a %SIGCHLD,
+ * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
*
- * Called with no locks held.
+ * @why is %CLD_STOPPED when about to stop for job control;
+ * we are already in %TASK_STOPPED state, about to call schedule().
+ * It might also be that we have just exited (check %PF_EXITING),
+ * but need to report that a group-wide stop is complete.
+ *
+ * @why is %CLD_CONTINUED when waking up after job control stop and
+ * ready to make a delayed @notify report.
+ *
+ * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
+ *
+ * Called with the siglock held.
*/
static inline int tracehook_notify_jctl(int notify, int why)
{
- return notify || (current->ptrace & PT_PTRACED);
+ return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
+}
+
+/**
+ * tracehook_finish_jctl - report about return from job control stop
+ *
+ * This is called by do_signal_stop() after wakeup.
+ */
+static inline void tracehook_finish_jctl(void)
+{
}
#define DEATH_REAP -1
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 63a3f7a8058..660a9de96f8 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -4,7 +4,7 @@
/*
* Kernel Tracepoint API.
*
- * See Documentation/tracepoint.txt.
+ * See Documentation/trace/tracepoints.txt.
*
* (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
*
diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h
index 46dd12c5709..9356b24223a 100644
--- a/include/linux/unaligned/be_byteshift.h
+++ b/include/linux/unaligned/be_byteshift.h
@@ -1,7 +1,7 @@
#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H
#define _LINUX_UNALIGNED_BE_BYTESHIFT_H
-#include <linux/kernel.h>
+#include <linux/types.h>
static inline u16 __get_unaligned_be16(const u8 *p)
{
diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h
index 59777e951ba..be376fb79b6 100644
--- a/include/linux/unaligned/le_byteshift.h
+++ b/include/linux/unaligned/le_byteshift.h
@@ -1,7 +1,7 @@
#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H
#define _LINUX_UNALIGNED_LE_BYTESHIFT_H
-#include <linux/kernel.h>
+#include <linux/types.h>
static inline u16 __get_unaligned_le16(const u8 *p)
{
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index bb69e256cd1..f8147305205 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -89,6 +89,7 @@ struct driver_info {
#define FLAG_FRAMING_AX 0x0040 /* AX88772/178 packets */
#define FLAG_WLAN 0x0080 /* use "wlan%d" names */
#define FLAG_AVOID_UNLINK_URBS 0x0100 /* don't unlink urbs at usbnet_stop() */
+#define FLAG_SEND_ZLP 0x0200 /* hw requires ZLPs are sent */
/* init device ... can sleep, or cause probe() failure */
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 3656b300de3..69f39974c04 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -36,7 +36,6 @@ struct new_utsname {
#include <linux/kref.h>
#include <linux/nsproxy.h>
#include <linux/err.h>
-#include <asm/atomic.h>
struct uts_namespace {
struct kref kref;
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index 923f9040ea2..2dfaa293ae8 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -1,5 +1,6 @@
/*
- * vgaarb.c
+ * The VGA aribiter manages VGA space routing and VGA resource decode to
+ * allow multiple VGA devices to be used in a system in a safe way.
*
* (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
* (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 75cf58666ff..66ebddcff66 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -110,21 +110,20 @@ extern int laptop_mode;
extern unsigned long determine_dirtyable_memory(void);
extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int dirty_ratio_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int dirty_bytes_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
struct ctl_table;
-struct file;
-int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
+int dirty_writeback_centisecs_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index b77c1478c99..a7fb54808a2 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -38,6 +38,8 @@
* @P9_DEBUG_SLABS: memory management tracing
* @P9_DEBUG_FCALL: verbose dump of protocol messages
* @P9_DEBUG_FID: fid allocation/deallocation tracking
+ * @P9_DEBUG_PKT: packet marshalling/unmarshalling
+ * @P9_DEBUG_FSC: FS-cache tracing
*
* These flags are passed at mount time to turn on various levels of
* verbosity and tracing which will be output to the system logs.
@@ -54,6 +56,7 @@ enum p9_debug_flags {
P9_DEBUG_FCALL = (1<<8),
P9_DEBUG_FID = (1<<9),
P9_DEBUG_PKT = (1<<10),
+ P9_DEBUG_FSC = (1<<11),
};
#ifdef CONFIG_NET_9P_DEBUG
diff --git a/include/net/ip.h b/include/net/ip.h
index 72c36926c26..5b26a0bd178 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -399,7 +399,7 @@ extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
* fed into the routing cache should use these handlers.
*/
int ipv4_doint_and_flush(ctl_table *ctl, int write,
- struct file* filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos);
int ipv4_doint_and_flush_strategy(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
diff --git a/include/net/ipip.h b/include/net/ipip.h
index 5d3036fa151..76e3ea6e2fe 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -12,7 +12,6 @@ struct ip_tunnel
struct ip_tunnel *next;
struct net_device *dev;
- int recursion; /* Depth of hard_start_xmit recursion */
int err_count; /* Number of arrived ICMP errors */
unsigned long err_time; /* Time when the last ICMP error arrived */
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 1459ed3e269..f76f22d0572 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -55,7 +55,6 @@ enum {
#include <net/neighbour.h>
struct ctl_table;
-struct file;
struct inet6_dev;
struct net_device;
struct net_proto_family;
@@ -139,7 +138,6 @@ extern int igmp6_event_report(struct sk_buff *skb);
#ifdef CONFIG_SYSCTL
extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl,
int write,
- struct file * filp,
void __user *buffer,
size_t *lenp,
loff_t *ppos);
diff --git a/init/Kconfig b/init/Kconfig
index 0aa6579504c..c7bac39d6c6 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1006,14 +1006,6 @@ config SLUB_DEBUG
SLUB sysfs support. /sys/slab will not exist and there will be
no support for cache validation etc.
-config STRIP_ASM_SYMS
- bool "Strip assembler-generated symbols during link"
- default n
- help
- Strip internal assembler-generated symbols during a link (symbols
- that look like '.Lxxx') so they don't pollute the output of
- get_wchan() and suchlike.
-
config COMPAT_BRK
bool "Disable heap randomization"
default y
diff --git a/init/main.c b/init/main.c
index 6107223124e..7449819a480 100644
--- a/init/main.c
+++ b/init/main.c
@@ -18,7 +18,6 @@
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/delay.h>
-#include <linux/utsname.h>
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
@@ -360,11 +359,6 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
#else
-#if NR_CPUS > BITS_PER_LONG
-cpumask_t cpu_mask_all __read_mostly = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_mask_all);
-#endif
-
/* Setup number of possible processor ids */
int nr_cpu_ids __read_mostly = NR_CPUS;
EXPORT_SYMBOL(nr_cpu_ids);
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 40eab7314ae..7d3704750ef 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,18 +27,18 @@ static void *get_ipc(ctl_table *table)
}
#ifdef CONFIG_PROC_SYSCTL
-static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_ipc_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
- return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
}
static int proc_ipc_callback_dointvec(ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
size_t lenp_bef = *lenp;
@@ -47,7 +47,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
- rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+ rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
if (write && !rc && lenp_bef == *lenp)
/*
@@ -61,13 +61,13 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
}
static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
- return proc_doulongvec_minmax(&ipc_table, write, filp, buffer,
+ return proc_doulongvec_minmax(&ipc_table, write, buffer,
lenp, ppos);
}
@@ -95,7 +95,7 @@ static void ipc_auto_callback(int val)
}
static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
size_t lenp_bef = *lenp;
@@ -106,7 +106,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
ipc_table.data = get_ipc(table);
oldval = *((int *)(ipc_table.data));
- rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+ rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
if (write && !rc && lenp_bef == *lenp) {
int newval = *((int *)(ipc_table.data));
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 24ae46dfe45..8a058711fc1 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -31,24 +31,24 @@ static void *get_mq(ctl_table *table)
return which;
}
-static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_mq_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table mq_table;
memcpy(&mq_table, table, sizeof(mq_table));
mq_table.data = get_mq(table);
- return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
}
static int proc_mq_dointvec_minmax(ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table mq_table;
memcpy(&mq_table, table, sizeof(mq_table));
mq_table.data = get_mq(table);
- return proc_dointvec_minmax(&mq_table, write, filp, buffer,
+ return proc_dointvec_minmax(&mq_table, write, buffer,
lenp, ppos);
}
#else
diff --git a/kernel/Makefile b/kernel/Makefile
index 187c89b4783..b8d4cd8ac0b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -58,7 +58,6 @@ obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup.o
-obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
diff --git a/kernel/audit.c b/kernel/audit.c
index defc2e6f1e3..5feed232be9 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -855,18 +855,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
break;
}
case AUDIT_SIGNAL_INFO:
- err = security_secid_to_secctx(audit_sig_sid, &ctx, &len);
- if (err)
- return err;
+ len = 0;
+ if (audit_sig_sid) {
+ err = security_secid_to_secctx(audit_sig_sid, &ctx, &len);
+ if (err)
+ return err;
+ }
sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
if (!sig_data) {
- security_release_secctx(ctx, len);
+ if (audit_sig_sid)
+ security_release_secctx(ctx, len);
return -ENOMEM;
}
sig_data->uid = audit_sig_uid;
sig_data->pid = audit_sig_pid;
- memcpy(sig_data->ctx, ctx, len);
- security_release_secctx(ctx, len);
+ if (audit_sig_sid) {
+ memcpy(sig_data->ctx, ctx, len);
+ security_release_secctx(ctx, len);
+ }
audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO,
0, 0, sig_data, sizeof(*sig_data) + len);
kfree(sig_data);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 0e96dbc60ea..cc7e87936cb 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -45,8 +45,8 @@
struct audit_watch {
atomic_t count; /* reference count */
- char *path; /* insertion path */
dev_t dev; /* associated superblock device */
+ char *path; /* insertion path */
unsigned long ino; /* associated inode number */
struct audit_parent *parent; /* associated parent */
struct list_head wlist; /* entry in parent->watches list */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 68d3c6a0ecd..267e484f019 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -168,12 +168,12 @@ struct audit_context {
int in_syscall; /* 1 if task is in a syscall */
enum audit_state state, current_state;
unsigned int serial; /* serial number for record */
- struct timespec ctime; /* time of syscall entry */
int major; /* syscall number */
+ struct timespec ctime; /* time of syscall entry */
unsigned long argv[4]; /* syscall arguments */
- int return_valid; /* return code is valid */
long return_code;/* syscall return code */
u64 prio;
+ int return_valid; /* return code is valid */
int name_count;
struct audit_names names[AUDIT_NAMES];
char * filterkey; /* key for rule that triggered record */
@@ -198,8 +198,8 @@ struct audit_context {
char target_comm[TASK_COMM_LEN];
struct audit_tree_refs *trees, *first_trees;
- int tree_count;
struct list_head killed_trees;
+ int tree_count;
int type;
union {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cd83d9933b6..7ccba4bc5e3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -23,6 +23,7 @@
*/
#include <linux/cgroup.h>
+#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/kernel.h>
@@ -48,6 +49,8 @@
#include <linux/namei.h>
#include <linux/smp_lock.h>
#include <linux/pid_namespace.h>
+#include <linux/idr.h>
+#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
#include <asm/atomic.h>
@@ -60,6 +63,8 @@ static struct cgroup_subsys *subsys[] = {
#include <linux/cgroup_subsys.h>
};
+#define MAX_CGROUP_ROOT_NAMELEN 64
+
/*
* A cgroupfs_root represents the root of a cgroup hierarchy,
* and may be associated with a superblock to form an active
@@ -74,6 +79,9 @@ struct cgroupfs_root {
*/
unsigned long subsys_bits;
+ /* Unique id for this hierarchy. */
+ int hierarchy_id;
+
/* The bitmask of subsystems currently attached to this hierarchy */
unsigned long actual_subsys_bits;
@@ -94,6 +102,9 @@ struct cgroupfs_root {
/* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
+
+ /* The name for this hierarchy - may be empty */
+ char name[MAX_CGROUP_ROOT_NAMELEN];
};
/*
@@ -141,6 +152,10 @@ struct css_id {
static LIST_HEAD(roots);
static int root_count;
+static DEFINE_IDA(hierarchy_ida);
+static int next_hierarchy_id;
+static DEFINE_SPINLOCK(hierarchy_id_lock);
+
/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
#define dummytop (&rootnode.top_cgroup)
@@ -201,6 +216,7 @@ struct cg_cgroup_link {
* cgroup, anchored on cgroup->css_sets
*/
struct list_head cgrp_link_list;
+ struct cgroup *cgrp;
/*
* List running through cg_cgroup_links pointing at a
* single css_set object, anchored on css_set->cg_links
@@ -227,8 +243,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
static DEFINE_RWLOCK(css_set_lock);
static int css_set_count;
-/* hash table for cgroup groups. This improves the performance to
- * find an existing css_set */
+/*
+ * hash table for cgroup groups. This improves the performance to find
+ * an existing css_set. This hash doesn't (currently) take into
+ * account cgroups in empty hierarchies.
+ */
#define CSS_SET_HASH_BITS 7
#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
@@ -248,48 +267,22 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
return &css_set_table[index];
}
+static void free_css_set_rcu(struct rcu_head *obj)
+{
+ struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+ kfree(cg);
+}
+
/* We don't maintain the lists running through each css_set to its
* task until after the first call to cgroup_iter_start(). This
* reduces the fork()/exit() overhead for people who have cgroups
* compiled into their kernel but not actually in use */
static int use_task_css_set_links __read_mostly;
-/* When we create or destroy a css_set, the operation simply
- * takes/releases a reference count on all the cgroups referenced
- * by subsystems in this css_set. This can end up multiple-counting
- * some cgroups, but that's OK - the ref-count is just a
- * busy/not-busy indicator; ensuring that we only count each cgroup
- * once would require taking a global lock to ensure that no
- * subsystems moved between hierarchies while we were doing so.
- *
- * Possible TODO: decide at boot time based on the number of
- * registered subsystems and the number of CPUs or NUMA nodes whether
- * it's better for performance to ref-count every subsystem, or to
- * take a global lock and only add one ref count to each hierarchy.
- */
-
-/*
- * unlink a css_set from the list and free it
- */
-static void unlink_css_set(struct css_set *cg)
+static void __put_css_set(struct css_set *cg, int taskexit)
{
struct cg_cgroup_link *link;
struct cg_cgroup_link *saved_link;
-
- hlist_del(&cg->hlist);
- css_set_count--;
-
- list_for_each_entry_safe(link, saved_link, &cg->cg_links,
- cg_link_list) {
- list_del(&link->cg_link_list);
- list_del(&link->cgrp_link_list);
- kfree(link);
- }
-}
-
-static void __put_css_set(struct css_set *cg, int taskexit)
-{
- int i;
/*
* Ensure that the refcount doesn't hit zero while any readers
* can see it. Similar to atomic_dec_and_lock(), but for an
@@ -302,21 +295,28 @@ static void __put_css_set(struct css_set *cg, int taskexit)
write_unlock(&css_set_lock);
return;
}
- unlink_css_set(cg);
- write_unlock(&css_set_lock);
- rcu_read_lock();
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
- struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
+ /* This css_set is dead. unlink it and release cgroup refcounts */
+ hlist_del(&cg->hlist);
+ css_set_count--;
+
+ list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+ cg_link_list) {
+ struct cgroup *cgrp = link->cgrp;
+ list_del(&link->cg_link_list);
+ list_del(&link->cgrp_link_list);
if (atomic_dec_and_test(&cgrp->count) &&
notify_on_release(cgrp)) {
if (taskexit)
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
}
+
+ kfree(link);
}
- rcu_read_unlock();
- kfree(cg);
+
+ write_unlock(&css_set_lock);
+ call_rcu(&cg->rcu_head, free_css_set_rcu);
}
/*
@@ -338,6 +338,78 @@ static inline void put_css_set_taskexit(struct css_set *cg)
}
/*
+ * compare_css_sets - helper function for find_existing_css_set().
+ * @cg: candidate css_set being tested
+ * @old_cg: existing css_set for a task
+ * @new_cgrp: cgroup that's being entered by the task
+ * @template: desired set of css pointers in css_set (pre-calculated)
+ *
+ * Returns true if "cg" matches "old_cg" except for the hierarchy
+ * which "new_cgrp" belongs to, for which it should match "new_cgrp".
+ */
+static bool compare_css_sets(struct css_set *cg,
+ struct css_set *old_cg,
+ struct cgroup *new_cgrp,
+ struct cgroup_subsys_state *template[])
+{
+ struct list_head *l1, *l2;
+
+ if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
+ /* Not all subsystems matched */
+ return false;
+ }
+
+ /*
+ * Compare cgroup pointers in order to distinguish between
+ * different cgroups in heirarchies with no subsystems. We
+ * could get by with just this check alone (and skip the
+ * memcmp above) but on most setups the memcmp check will
+ * avoid the need for this more expensive check on almost all
+ * candidates.
+ */
+
+ l1 = &cg->cg_links;
+ l2 = &old_cg->cg_links;
+ while (1) {
+ struct cg_cgroup_link *cgl1, *cgl2;
+ struct cgroup *cg1, *cg2;
+
+ l1 = l1->next;
+ l2 = l2->next;
+ /* See if we reached the end - both lists are equal length. */
+ if (l1 == &cg->cg_links) {
+ BUG_ON(l2 != &old_cg->cg_links);
+ break;
+ } else {
+ BUG_ON(l2 == &old_cg->cg_links);
+ }
+ /* Locate the cgroups associated with these links. */
+ cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
+ cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
+ cg1 = cgl1->cgrp;
+ cg2 = cgl2->cgrp;
+ /* Hierarchies should be linked in the same order. */
+ BUG_ON(cg1->root != cg2->root);
+
+ /*
+ * If this hierarchy is the hierarchy of the cgroup
+ * that's changing, then we need to check that this
+ * css_set points to the new cgroup; if it's any other
+ * hierarchy, then this css_set should point to the
+ * same cgroup as the old css_set.
+ */
+ if (cg1->root == new_cgrp->root) {
+ if (cg1 != new_cgrp)
+ return false;
+ } else {
+ if (cg1 != cg2)
+ return false;
+ }
+ }
+ return true;
+}
+
+/*
* find_existing_css_set() is a helper for
* find_css_set(), and checks to see whether an existing
* css_set is suitable.
@@ -378,10 +450,11 @@ static struct css_set *find_existing_css_set(
hhead = css_set_hash(template);
hlist_for_each_entry(cg, node, hhead, hlist) {
- if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
- /* All subsystems matched */
- return cg;
- }
+ if (!compare_css_sets(cg, oldcg, cgrp, template))
+ continue;
+
+ /* This css_set matches what we need */
+ return cg;
}
/* No existing cgroup group matched */
@@ -435,8 +508,14 @@ static void link_css_set(struct list_head *tmp_cg_links,
link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
cgrp_link_list);
link->cg = cg;
+ link->cgrp = cgrp;
+ atomic_inc(&cgrp->count);
list_move(&link->cgrp_link_list, &cgrp->css_sets);
- list_add(&link->cg_link_list, &cg->cg_links);
+ /*
+ * Always add links to the tail of the list so that the list
+ * is sorted by order of hierarchy creation
+ */
+ list_add_tail(&link->cg_link_list, &cg->cg_links);
}
/*
@@ -451,11 +530,11 @@ static struct css_set *find_css_set(
{
struct css_set *res;
struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
- int i;
struct list_head tmp_cg_links;
struct hlist_head *hhead;
+ struct cg_cgroup_link *link;
/* First see if we already have a cgroup group that matches
* the desired set */
@@ -489,20 +568,12 @@ static struct css_set *find_css_set(
write_lock(&css_set_lock);
/* Add reference counts and links from the new css_set. */
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
- struct cgroup *cgrp = res->subsys[i]->cgroup;
- struct cgroup_subsys *ss = subsys[i];
- atomic_inc(&cgrp->count);
- /*
- * We want to add a link once per cgroup, so we
- * only do it for the first subsystem in each
- * hierarchy
- */
- if (ss->root->subsys_list.next == &ss->sibling)
- link_css_set(&tmp_cg_links, res, cgrp);
+ list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
+ struct cgroup *c = link->cgrp;
+ if (c->root == cgrp->root)
+ c = cgrp;
+ link_css_set(&tmp_cg_links, res, c);
}
- if (list_empty(&rootnode.subsys_list))
- link_css_set(&tmp_cg_links, res, dummytop);
BUG_ON(!list_empty(&tmp_cg_links));
@@ -518,6 +589,41 @@ static struct css_set *find_css_set(
}
/*
+ * Return the cgroup for "task" from the given hierarchy. Must be
+ * called with cgroup_mutex held.
+ */
+static struct cgroup *task_cgroup_from_root(struct task_struct *task,
+ struct cgroupfs_root *root)
+{
+ struct css_set *css;
+ struct cgroup *res = NULL;
+
+ BUG_ON(!mutex_is_locked(&cgroup_mutex));
+ read_lock(&css_set_lock);
+ /*
+ * No need to lock the task - since we hold cgroup_mutex the
+ * task can't change groups, so the only thing that can happen
+ * is that it exits and its css is set back to init_css_set.
+ */
+ css = task->cgroups;
+ if (css == &init_css_set) {
+ res = &root->top_cgroup;
+ } else {
+ struct cg_cgroup_link *link;
+ list_for_each_entry(link, &css->cg_links, cg_link_list) {
+ struct cgroup *c = link->cgrp;
+ if (c->root == root) {
+ res = c;
+ break;
+ }
+ }
+ }
+ read_unlock(&css_set_lock);
+ BUG_ON(!res);
+ return res;
+}
+
+/*
* There is one global cgroup mutex. We also require taking
* task_lock() when dereferencing a task's cgroup subsys pointers.
* See "The task_lock() exception", at the end of this comment.
@@ -677,6 +783,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
*/
deactivate_super(cgrp->root->sb);
+ /*
+ * if we're getting rid of the cgroup, refcount should ensure
+ * that there are no pidlists left.
+ */
+ BUG_ON(!list_empty(&cgrp->pidlists));
+
call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
}
iput(inode);
@@ -841,6 +953,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",noprefix");
if (strlen(root->release_agent_path))
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+ if (strlen(root->name))
+ seq_printf(seq, ",name=%s", root->name);
mutex_unlock(&cgroup_mutex);
return 0;
}
@@ -849,6 +963,12 @@ struct cgroup_sb_opts {
unsigned long subsys_bits;
unsigned long flags;
char *release_agent;
+ char *name;
+ /* User explicitly requested empty subsystem */
+ bool none;
+
+ struct cgroupfs_root *new_root;
+
};
/* Convert a hierarchy specifier into a bitmask of subsystems and
@@ -863,9 +983,7 @@ static int parse_cgroupfs_options(char *data,
mask = ~(1UL << cpuset_subsys_id);
#endif
- opts->subsys_bits = 0;
- opts->flags = 0;
- opts->release_agent = NULL;
+ memset(opts, 0, sizeof(*opts));
while ((token = strsep(&o, ",")) != NULL) {
if (!*token)
@@ -879,17 +997,42 @@ static int parse_cgroupfs_options(char *data,
if (!ss->disabled)
opts->subsys_bits |= 1ul << i;
}
+ } else if (!strcmp(token, "none")) {
+ /* Explicitly have no subsystems */
+ opts->none = true;
} else if (!strcmp(token, "noprefix")) {
set_bit(ROOT_NOPREFIX, &opts->flags);
} else if (!strncmp(token, "release_agent=", 14)) {
/* Specifying two release agents is forbidden */
if (opts->release_agent)
return -EINVAL;
- opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
+ opts->release_agent =
+ kstrndup(token + 14, PATH_MAX, GFP_KERNEL);
if (!opts->release_agent)
return -ENOMEM;
- strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
- opts->release_agent[PATH_MAX - 1] = 0;
+ } else if (!strncmp(token, "name=", 5)) {
+ int i;
+ const char *name = token + 5;
+ /* Can't specify an empty name */
+ if (!strlen(name))
+ return -EINVAL;
+ /* Must match [\w.-]+ */
+ for (i = 0; i < strlen(name); i++) {
+ char c = name[i];
+ if (isalnum(c))
+ continue;
+ if ((c == '.') || (c == '-') || (c == '_'))
+ continue;
+ return -EINVAL;
+ }
+ /* Specifying two names is forbidden */
+ if (opts->name)
+ return -EINVAL;
+ opts->name = kstrndup(name,
+ MAX_CGROUP_ROOT_NAMELEN,
+ GFP_KERNEL);
+ if (!opts->name)
+ return -ENOMEM;
} else {
struct cgroup_subsys *ss;
int i;
@@ -906,6 +1049,8 @@ static int parse_cgroupfs_options(char *data,
}
}
+ /* Consistency checks */
+
/*
* Option noprefix was introduced just for backward compatibility
* with the old cpuset, so we allow noprefix only if mounting just
@@ -915,8 +1060,16 @@ static int parse_cgroupfs_options(char *data,
(opts->subsys_bits & mask))
return -EINVAL;
- /* We can't have an empty hierarchy */
- if (!opts->subsys_bits)
+
+ /* Can't specify "none" and some subsystems */
+ if (opts->subsys_bits && opts->none)
+ return -EINVAL;
+
+ /*
+ * We either have to specify by name or by subsystems. (So all
+ * empty hierarchies must have a name).
+ */
+ if (!opts->subsys_bits && !opts->name)
return -EINVAL;
return 0;
@@ -944,6 +1097,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
goto out_unlock;
}
+ /* Don't allow name to change at remount */
+ if (opts.name && strcmp(opts.name, root->name)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
ret = rebind_subsystems(root, opts.subsys_bits);
if (ret)
goto out_unlock;
@@ -955,6 +1114,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
strcpy(root->release_agent_path, opts.release_agent);
out_unlock:
kfree(opts.release_agent);
+ kfree(opts.name);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
unlock_kernel();
@@ -974,9 +1134,10 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->children);
INIT_LIST_HEAD(&cgrp->css_sets);
INIT_LIST_HEAD(&cgrp->release_list);
- INIT_LIST_HEAD(&cgrp->pids_list);
- init_rwsem(&cgrp->pids_mutex);
+ INIT_LIST_HEAD(&cgrp->pidlists);
+ mutex_init(&cgrp->pidlist_mutex);
}
+
static void init_cgroup_root(struct cgroupfs_root *root)
{
struct cgroup *cgrp = &root->top_cgroup;
@@ -988,33 +1149,106 @@ static void init_cgroup_root(struct cgroupfs_root *root)
init_cgroup_housekeeping(cgrp);
}
+static bool init_root_id(struct cgroupfs_root *root)
+{
+ int ret = 0;
+
+ do {
+ if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
+ return false;
+ spin_lock(&hierarchy_id_lock);
+ /* Try to allocate the next unused ID */
+ ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
+ &root->hierarchy_id);
+ if (ret == -ENOSPC)
+ /* Try again starting from 0 */
+ ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
+ if (!ret) {
+ next_hierarchy_id = root->hierarchy_id + 1;
+ } else if (ret != -EAGAIN) {
+ /* Can only get here if the 31-bit IDR is full ... */
+ BUG_ON(ret);
+ }
+ spin_unlock(&hierarchy_id_lock);
+ } while (ret);
+ return true;
+}
+
static int cgroup_test_super(struct super_block *sb, void *data)
{
- struct cgroupfs_root *new = data;
+ struct cgroup_sb_opts *opts = data;
struct cgroupfs_root *root = sb->s_fs_info;
- /* First check subsystems */
- if (new->subsys_bits != root->subsys_bits)
- return 0;
+ /* If we asked for a name then it must match */
+ if (opts->name && strcmp(opts->name, root->name))
+ return 0;
- /* Next check flags */
- if (new->flags != root->flags)
+ /*
+ * If we asked for subsystems (or explicitly for no
+ * subsystems) then they must match
+ */
+ if ((opts->subsys_bits || opts->none)
+ && (opts->subsys_bits != root->subsys_bits))
return 0;
return 1;
}
+static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
+{
+ struct cgroupfs_root *root;
+
+ if (!opts->subsys_bits && !opts->none)
+ return NULL;
+
+ root = kzalloc(sizeof(*root), GFP_KERNEL);
+ if (!root)
+ return ERR_PTR(-ENOMEM);
+
+ if (!init_root_id(root)) {
+ kfree(root);
+ return ERR_PTR(-ENOMEM);
+ }
+ init_cgroup_root(root);
+
+ root->subsys_bits = opts->subsys_bits;
+ root->flags = opts->flags;
+ if (opts->release_agent)
+ strcpy(root->release_agent_path, opts->release_agent);
+ if (opts->name)
+ strcpy(root->name, opts->name);
+ return root;
+}
+
+static void cgroup_drop_root(struct cgroupfs_root *root)
+{
+ if (!root)
+ return;
+
+ BUG_ON(!root->hierarchy_id);
+ spin_lock(&hierarchy_id_lock);
+ ida_remove(&hierarchy_ida, root->hierarchy_id);
+ spin_unlock(&hierarchy_id_lock);
+ kfree(root);
+}
+
static int cgroup_set_super(struct super_block *sb, void *data)
{
int ret;
- struct cgroupfs_root *root = data;
+ struct cgroup_sb_opts *opts = data;
+
+ /* If we don't have a new root, we can't set up a new sb */
+ if (!opts->new_root)
+ return -EINVAL;
+
+ BUG_ON(!opts->subsys_bits && !opts->none);
ret = set_anon_super(sb, NULL);
if (ret)
return ret;
- sb->s_fs_info = root;
- root->sb = sb;
+ sb->s_fs_info = opts->new_root;
+ opts->new_root->sb = sb;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -1051,48 +1285,43 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
void *data, struct vfsmount *mnt)
{
struct cgroup_sb_opts opts;
+ struct cgroupfs_root *root;
int ret = 0;
struct super_block *sb;
- struct cgroupfs_root *root;
- struct list_head tmp_cg_links;
+ struct cgroupfs_root *new_root;
/* First find the desired set of subsystems */
ret = parse_cgroupfs_options(data, &opts);
- if (ret) {
- kfree(opts.release_agent);
- return ret;
- }
-
- root = kzalloc(sizeof(*root), GFP_KERNEL);
- if (!root) {
- kfree(opts.release_agent);
- return -ENOMEM;
- }
+ if (ret)
+ goto out_err;
- init_cgroup_root(root);
- root->subsys_bits = opts.subsys_bits;
- root->flags = opts.flags;
- if (opts.release_agent) {
- strcpy(root->release_agent_path, opts.release_agent);
- kfree(opts.release_agent);
+ /*
+ * Allocate a new cgroup root. We may not need it if we're
+ * reusing an existing hierarchy.
+ */
+ new_root = cgroup_root_from_opts(&opts);
+ if (IS_ERR(new_root)) {
+ ret = PTR_ERR(new_root);
+ goto out_err;
}
+ opts.new_root = new_root;
- sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
-
+ /* Locate an existing or new sb for this hierarchy */
+ sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
if (IS_ERR(sb)) {
- kfree(root);
- return PTR_ERR(sb);
+ ret = PTR_ERR(sb);
+ cgroup_drop_root(opts.new_root);
+ goto out_err;
}
- if (sb->s_fs_info != root) {
- /* Reusing an existing superblock */
- BUG_ON(sb->s_root == NULL);
- kfree(root);
- root = NULL;
- } else {
- /* New superblock */
+ root = sb->s_fs_info;
+ BUG_ON(!root);
+ if (root == opts.new_root) {
+ /* We used the new root structure, so this is a new hierarchy */
+ struct list_head tmp_cg_links;
struct cgroup *root_cgrp = &root->top_cgroup;
struct inode *inode;
+ struct cgroupfs_root *existing_root;
int i;
BUG_ON(sb->s_root != NULL);
@@ -1105,6 +1334,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
+ if (strlen(root->name)) {
+ /* Check for name clashes with existing mounts */
+ for_each_active_root(existing_root) {
+ if (!strcmp(existing_root->name, root->name)) {
+ ret = -EBUSY;
+ mutex_unlock(&cgroup_mutex);
+ mutex_unlock(&inode->i_mutex);
+ goto drop_new_super;
+ }
+ }
+ }
+
/*
* We're accessing css_set_count without locking
* css_set_lock here, but that's OK - it can only be
@@ -1123,7 +1364,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
if (ret == -EBUSY) {
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
- goto free_cg_links;
+ free_cg_links(&tmp_cg_links);
+ goto drop_new_super;
}
/* EBUSY should be the only error here */
@@ -1155,17 +1397,27 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
BUG_ON(root->number_of_cgroups != 1);
cgroup_populate_dir(root_cgrp);
- mutex_unlock(&inode->i_mutex);
mutex_unlock(&cgroup_mutex);
+ mutex_unlock(&inode->i_mutex);
+ } else {
+ /*
+ * We re-used an existing hierarchy - the new root (if
+ * any) is not needed
+ */
+ cgroup_drop_root(opts.new_root);
}
simple_set_mnt(mnt, sb);
+ kfree(opts.release_agent);
+ kfree(opts.name);
return 0;
- free_cg_links:
- free_cg_links(&tmp_cg_links);
drop_new_super:
deactivate_locked_super(sb);
+ out_err:
+ kfree(opts.release_agent);
+ kfree(opts.name);
+
return ret;
}
@@ -1211,7 +1463,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
mutex_unlock(&cgroup_mutex);
kill_litter_super(sb);
- kfree(root);
+ cgroup_drop_root(root);
}
static struct file_system_type cgroup_fs_type = {
@@ -1276,27 +1528,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
return 0;
}
-/*
- * Return the first subsystem attached to a cgroup's hierarchy, and
- * its subsystem id.
- */
-
-static void get_first_subsys(const struct cgroup *cgrp,
- struct cgroup_subsys_state **css, int *subsys_id)
-{
- const struct cgroupfs_root *root = cgrp->root;
- const struct cgroup_subsys *test_ss;
- BUG_ON(list_empty(&root->subsys_list));
- test_ss = list_entry(root->subsys_list.next,
- struct cgroup_subsys, sibling);
- if (css) {
- *css = cgrp->subsys[test_ss->subsys_id];
- BUG_ON(!*css);
- }
- if (subsys_id)
- *subsys_id = test_ss->subsys_id;
-}
-
/**
* cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
* @cgrp: the cgroup the task is attaching to
@@ -1313,18 +1544,15 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
struct css_set *cg;
struct css_set *newcg;
struct cgroupfs_root *root = cgrp->root;
- int subsys_id;
-
- get_first_subsys(cgrp, NULL, &subsys_id);
/* Nothing to do if the task is already in that cgroup */
- oldcgrp = task_cgroup(tsk, subsys_id);
+ oldcgrp = task_cgroup_from_root(tsk, root);
if (cgrp == oldcgrp)
return 0;
for_each_subsys(root, ss) {
if (ss->can_attach) {
- retval = ss->can_attach(ss, cgrp, tsk);
+ retval = ss->can_attach(ss, cgrp, tsk, false);
if (retval)
return retval;
}
@@ -1362,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
for_each_subsys(root, ss) {
if (ss->attach)
- ss->attach(ss, cgrp, oldcgrp, tsk);
+ ss->attach(ss, cgrp, oldcgrp, tsk, false);
}
set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
synchronize_rcu();
@@ -1423,15 +1651,6 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
return ret;
}
-/* The various types of files and directories in a cgroup file system */
-enum cgroup_filetype {
- FILE_ROOT,
- FILE_DIR,
- FILE_TASKLIST,
- FILE_NOTIFY_ON_RELEASE,
- FILE_RELEASE_AGENT,
-};
-
/**
* cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
* @cgrp: the cgroup to be checked for liveness
@@ -1876,7 +2095,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
* the start of a css_set
*/
static void cgroup_advance_iter(struct cgroup *cgrp,
- struct cgroup_iter *it)
+ struct cgroup_iter *it)
{
struct list_head *l = it->cg_link;
struct cg_cgroup_link *link;
@@ -2129,7 +2348,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
}
/*
- * Stuff for reading the 'tasks' file.
+ * Stuff for reading the 'tasks'/'procs' files.
*
* Reading this file can return large amounts of data if a cgroup has
* *lots* of attached tasks. So it may need several calls to read(),
@@ -2139,27 +2358,196 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
*/
/*
- * Load into 'pidarray' up to 'npids' of the tasks using cgroup
- * 'cgrp'. Return actual number of pids loaded. No need to
- * task_lock(p) when reading out p->cgroup, since we're in an RCU
- * read section, so the css_set can't go away, and is
- * immutable after creation.
+ * The following two functions "fix" the issue where there are more pids
+ * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
+ * TODO: replace with a kernel-wide solution to this problem
+ */
+#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
+static void *pidlist_allocate(int count)
+{
+ if (PIDLIST_TOO_LARGE(count))
+ return vmalloc(count * sizeof(pid_t));
+ else
+ return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
+}
+static void pidlist_free(void *p)
+{
+ if (is_vmalloc_addr(p))
+ vfree(p);
+ else
+ kfree(p);
+}
+static void *pidlist_resize(void *p, int newcount)
+{
+ void *newlist;
+ /* note: if new alloc fails, old p will still be valid either way */
+ if (is_vmalloc_addr(p)) {
+ newlist = vmalloc(newcount * sizeof(pid_t));
+ if (!newlist)
+ return NULL;
+ memcpy(newlist, p, newcount * sizeof(pid_t));
+ vfree(p);
+ } else {
+ newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
+ }
+ return newlist;
+}
+
+/*
+ * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
+ * If the new stripped list is sufficiently smaller and there's enough memory
+ * to allocate a new buffer, will let go of the unneeded memory. Returns the
+ * number of unique elements.
+ */
+/* is the size difference enough that we should re-allocate the array? */
+#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
+static int pidlist_uniq(pid_t **p, int length)
+{
+ int src, dest = 1;
+ pid_t *list = *p;
+ pid_t *newlist;
+
+ /*
+ * we presume the 0th element is unique, so i starts at 1. trivial
+ * edge cases first; no work needs to be done for either
+ */
+ if (length == 0 || length == 1)
+ return length;
+ /* src and dest walk down the list; dest counts unique elements */
+ for (src = 1; src < length; src++) {
+ /* find next unique element */
+ while (list[src] == list[src-1]) {
+ src++;
+ if (src == length)
+ goto after;
+ }
+ /* dest always points to where the next unique element goes */
+ list[dest] = list[src];
+ dest++;
+ }
+after:
+ /*
+ * if the length difference is large enough, we want to allocate a
+ * smaller buffer to save memory. if this fails due to out of memory,
+ * we'll just stay with what we've got.
+ */
+ if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
+ newlist = pidlist_resize(list, dest);
+ if (newlist)
+ *p = newlist;
+ }
+ return dest;
+}
+
+static int cmppid(const void *a, const void *b)
+{
+ return *(pid_t *)a - *(pid_t *)b;
+}
+
+/*
+ * find the appropriate pidlist for our purpose (given procs vs tasks)
+ * returns with the lock on that pidlist already held, and takes care
+ * of the use count, or returns NULL with no locks held if we're out of
+ * memory.
*/
-static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
+static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
+ enum cgroup_filetype type)
{
- int n = 0, pid;
+ struct cgroup_pidlist *l;
+ /* don't need task_nsproxy() if we're looking at ourself */
+ struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
+ /*
+ * We can't drop the pidlist_mutex before taking the l->mutex in case
+ * the last ref-holder is trying to remove l from the list at the same
+ * time. Holding the pidlist_mutex precludes somebody taking whichever
+ * list we find out from under us - compare release_pid_array().
+ */
+ mutex_lock(&cgrp->pidlist_mutex);
+ list_for_each_entry(l, &cgrp->pidlists, links) {
+ if (l->key.type == type && l->key.ns == ns) {
+ /* found a matching list - drop the extra refcount */
+ put_pid_ns(ns);
+ /* make sure l doesn't vanish out from under us */
+ down_write(&l->mutex);
+ mutex_unlock(&cgrp->pidlist_mutex);
+ l->use_count++;
+ return l;
+ }
+ }
+ /* entry not found; create a new one */
+ l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
+ if (!l) {
+ mutex_unlock(&cgrp->pidlist_mutex);
+ put_pid_ns(ns);
+ return l;
+ }
+ init_rwsem(&l->mutex);
+ down_write(&l->mutex);
+ l->key.type = type;
+ l->key.ns = ns;
+ l->use_count = 0; /* don't increment here */
+ l->list = NULL;
+ l->owner = cgrp;
+ list_add(&l->links, &cgrp->pidlists);
+ mutex_unlock(&cgrp->pidlist_mutex);
+ return l;
+}
+
+/*
+ * Load a cgroup's pidarray with either procs' tgids or tasks' pids
+ */
+static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
+ struct cgroup_pidlist **lp)
+{
+ pid_t *array;
+ int length;
+ int pid, n = 0; /* used for populating the array */
struct cgroup_iter it;
struct task_struct *tsk;
+ struct cgroup_pidlist *l;
+
+ /*
+ * If cgroup gets more users after we read count, we won't have
+ * enough space - tough. This race is indistinguishable to the
+ * caller from the case that the additional cgroup users didn't
+ * show up until sometime later on.
+ */
+ length = cgroup_task_count(cgrp);
+ array = pidlist_allocate(length);
+ if (!array)
+ return -ENOMEM;
+ /* now, populate the array */
cgroup_iter_start(cgrp, &it);
while ((tsk = cgroup_iter_next(cgrp, &it))) {
- if (unlikely(n == npids))
+ if (unlikely(n == length))
break;
- pid = task_pid_vnr(tsk);
- if (pid > 0)
- pidarray[n++] = pid;
+ /* get tgid or pid for procs or tasks file respectively */
+ if (type == CGROUP_FILE_PROCS)
+ pid = task_tgid_vnr(tsk);
+ else
+ pid = task_pid_vnr(tsk);
+ if (pid > 0) /* make sure to only use valid results */
+ array[n++] = pid;
}
cgroup_iter_end(cgrp, &it);
- return n;
+ length = n;
+ /* now sort & (if procs) strip out duplicates */
+ sort(array, length, sizeof(pid_t), cmppid, NULL);
+ if (type == CGROUP_FILE_PROCS)
+ length = pidlist_uniq(&array, length);
+ l = cgroup_pidlist_find(cgrp, type);
+ if (!l) {
+ pidlist_free(array);
+ return -ENOMEM;
+ }
+ /* store array, freeing old if necessary - lock already held */
+ pidlist_free(l->list);
+ l->list = array;
+ l->length = length;
+ l->use_count++;
+ up_write(&l->mutex);
+ *lp = l;
+ return 0;
}
/**
@@ -2216,37 +2604,14 @@ err:
return ret;
}
-/*
- * Cache pids for all threads in the same pid namespace that are
- * opening the same "tasks" file.
- */
-struct cgroup_pids {
- /* The node in cgrp->pids_list */
- struct list_head list;
- /* The cgroup those pids belong to */
- struct cgroup *cgrp;
- /* The namepsace those pids belong to */
- struct pid_namespace *ns;
- /* Array of process ids in the cgroup */
- pid_t *tasks_pids;
- /* How many files are using the this tasks_pids array */
- int use_count;
- /* Length of the current tasks_pids array */
- int length;
-};
-
-static int cmppid(const void *a, const void *b)
-{
- return *(pid_t *)a - *(pid_t *)b;
-}
/*
- * seq_file methods for the "tasks" file. The seq_file position is the
+ * seq_file methods for the tasks/procs files. The seq_file position is the
* next pid to display; the seq_file iterator is a pointer to the pid
- * in the cgroup->tasks_pids array.
+ * in the cgroup->l->list array.
*/
-static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
+static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
{
/*
* Initially we receive a position value that corresponds to
@@ -2254,48 +2619,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
* after a seek to the start). Use a binary-search to find the
* next pid to display, if any
*/
- struct cgroup_pids *cp = s->private;
- struct cgroup *cgrp = cp->cgrp;
+ struct cgroup_pidlist *l = s->private;
int index = 0, pid = *pos;
int *iter;
- down_read(&cgrp->pids_mutex);
+ down_read(&l->mutex);
if (pid) {
- int end = cp->length;
+ int end = l->length;
while (index < end) {
int mid = (index + end) / 2;
- if (cp->tasks_pids[mid] == pid) {
+ if (l->list[mid] == pid) {
index = mid;
break;
- } else if (cp->tasks_pids[mid] <= pid)
+ } else if (l->list[mid] <= pid)
index = mid + 1;
else
end = mid;
}
}
/* If we're off the end of the array, we're done */
- if (index >= cp->length)
+ if (index >= l->length)
return NULL;
/* Update the abstract position to be the actual pid that we found */
- iter = cp->tasks_pids + index;
+ iter = l->list + index;
*pos = *iter;
return iter;
}
-static void cgroup_tasks_stop(struct seq_file *s, void *v)
+static void cgroup_pidlist_stop(struct seq_file *s, void *v)
{
- struct cgroup_pids *cp = s->private;
- struct cgroup *cgrp = cp->cgrp;
- up_read(&cgrp->pids_mutex);
+ struct cgroup_pidlist *l = s->private;
+ up_read(&l->mutex);
}
-static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct cgroup_pids *cp = s->private;
- int *p = v;
- int *end = cp->tasks_pids + cp->length;
-
+ struct cgroup_pidlist *l = s->private;
+ pid_t *p = v;
+ pid_t *end = l->list + l->length;
/*
* Advance to the next pid in the array. If this goes off the
* end, we're done
@@ -2309,124 +2671,107 @@ static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
}
}
-static int cgroup_tasks_show(struct seq_file *s, void *v)
+static int cgroup_pidlist_show(struct seq_file *s, void *v)
{
return seq_printf(s, "%d\n", *(int *)v);
}
-static const struct seq_operations cgroup_tasks_seq_operations = {
- .start = cgroup_tasks_start,
- .stop = cgroup_tasks_stop,
- .next = cgroup_tasks_next,
- .show = cgroup_tasks_show,
+/*
+ * seq_operations functions for iterating on pidlists through seq_file -
+ * independent of whether it's tasks or procs
+ */
+static const struct seq_operations cgroup_pidlist_seq_operations = {
+ .start = cgroup_pidlist_start,
+ .stop = cgroup_pidlist_stop,
+ .next = cgroup_pidlist_next,
+ .show = cgroup_pidlist_show,
};
-static void release_cgroup_pid_array(struct cgroup_pids *cp)
+static void cgroup_release_pid_array(struct cgroup_pidlist *l)
{
- struct cgroup *cgrp = cp->cgrp;
-
- down_write(&cgrp->pids_mutex);
- BUG_ON(!cp->use_count);
- if (!--cp->use_count) {
- list_del(&cp->list);
- put_pid_ns(cp->ns);
- kfree(cp->tasks_pids);
- kfree(cp);
+ /*
+ * the case where we're the last user of this particular pidlist will
+ * have us remove it from the cgroup's list, which entails taking the
+ * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
+ * pidlist_mutex, we have to take pidlist_mutex first.
+ */
+ mutex_lock(&l->owner->pidlist_mutex);
+ down_write(&l->mutex);
+ BUG_ON(!l->use_count);
+ if (!--l->use_count) {
+ /* we're the last user if refcount is 0; remove and free */
+ list_del(&l->links);
+ mutex_unlock(&l->owner->pidlist_mutex);
+ pidlist_free(l->list);
+ put_pid_ns(l->key.ns);
+ up_write(&l->mutex);
+ kfree(l);
+ return;
}
- up_write(&cgrp->pids_mutex);
+ mutex_unlock(&l->owner->pidlist_mutex);
+ up_write(&l->mutex);
}
-static int cgroup_tasks_release(struct inode *inode, struct file *file)
+static int cgroup_pidlist_release(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
- struct cgroup_pids *cp;
-
+ struct cgroup_pidlist *l;
if (!(file->f_mode & FMODE_READ))
return 0;
-
- seq = file->private_data;
- cp = seq->private;
-
- release_cgroup_pid_array(cp);
+ /*
+ * the seq_file will only be initialized if the file was opened for
+ * reading; hence we check if it's not null only in that case.
+ */
+ l = ((struct seq_file *)file->private_data)->private;
+ cgroup_release_pid_array(l);
return seq_release(inode, file);
}
-static struct file_operations cgroup_tasks_operations = {
+static const struct file_operations cgroup_pidlist_operations = {
.read = seq_read,
.llseek = seq_lseek,
.write = cgroup_file_write,
- .release = cgroup_tasks_release,
+ .release = cgroup_pidlist_release,
};
/*
- * Handle an open on 'tasks' file. Prepare an array containing the
- * process id's of tasks currently attached to the cgroup being opened.
+ * The following functions handle opens on a file that displays a pidlist
+ * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
+ * in the cgroup.
*/
-
-static int cgroup_tasks_open(struct inode *unused, struct file *file)
+/* helper function for the two below it */
+static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
{
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- struct pid_namespace *ns = current->nsproxy->pid_ns;
- struct cgroup_pids *cp;
- pid_t *pidarray;
- int npids;
+ struct cgroup_pidlist *l;
int retval;
/* Nothing to do for write-only files */
if (!(file->f_mode & FMODE_READ))
return 0;
- /*
- * If cgroup gets more users after we read count, we won't have
- * enough space - tough. This race is indistinguishable to the
- * caller from the case that the additional cgroup users didn't
- * show up until sometime later on.
- */
- npids = cgroup_task_count(cgrp);
- pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
- if (!pidarray)
- return -ENOMEM;
- npids = pid_array_load(pidarray, npids, cgrp);
- sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
- /*
- * Store the array in the cgroup, freeing the old
- * array if necessary
- */
- down_write(&cgrp->pids_mutex);
-
- list_for_each_entry(cp, &cgrp->pids_list, list) {
- if (ns == cp->ns)
- goto found;
- }
-
- cp = kzalloc(sizeof(*cp), GFP_KERNEL);
- if (!cp) {
- up_write(&cgrp->pids_mutex);
- kfree(pidarray);
- return -ENOMEM;
- }
- cp->cgrp = cgrp;
- cp->ns = ns;
- get_pid_ns(ns);
- list_add(&cp->list, &cgrp->pids_list);
-found:
- kfree(cp->tasks_pids);
- cp->tasks_pids = pidarray;
- cp->length = npids;
- cp->use_count++;
- up_write(&cgrp->pids_mutex);
-
- file->f_op = &cgroup_tasks_operations;
+ /* have the array populated */
+ retval = pidlist_array_load(cgrp, type, &l);
+ if (retval)
+ return retval;
+ /* configure file information */
+ file->f_op = &cgroup_pidlist_operations;
- retval = seq_open(file, &cgroup_tasks_seq_operations);
+ retval = seq_open(file, &cgroup_pidlist_seq_operations);
if (retval) {
- release_cgroup_pid_array(cp);
+ cgroup_release_pid_array(l);
return retval;
}
- ((struct seq_file *)file->private_data)->private = cp;
+ ((struct seq_file *)file->private_data)->private = l;
return 0;
}
+static int cgroup_tasks_open(struct inode *unused, struct file *file)
+{
+ return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
+}
+static int cgroup_procs_open(struct inode *unused, struct file *file)
+{
+ return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
+}
static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
struct cftype *cft)
@@ -2449,21 +2794,27 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
/*
* for the common functions, 'private' gives the type of file
*/
+/* for hysterical raisins, we can't put this on the older files */
+#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
static struct cftype files[] = {
{
.name = "tasks",
.open = cgroup_tasks_open,
.write_u64 = cgroup_tasks_write,
- .release = cgroup_tasks_release,
- .private = FILE_TASKLIST,
+ .release = cgroup_pidlist_release,
.mode = S_IRUGO | S_IWUSR,
},
-
+ {
+ .name = CGROUP_FILE_GENERIC_PREFIX "procs",
+ .open = cgroup_procs_open,
+ /* .write_u64 = cgroup_procs_write, TODO */
+ .release = cgroup_pidlist_release,
+ .mode = S_IRUGO,
+ },
{
.name = "notify_on_release",
.read_u64 = cgroup_read_notify_on_release,
.write_u64 = cgroup_write_notify_on_release,
- .private = FILE_NOTIFY_ON_RELEASE,
},
};
@@ -2472,7 +2823,6 @@ static struct cftype cft_release_agent = {
.read_seq_string = cgroup_release_agent_show,
.write_string = cgroup_release_agent_write,
.max_write_len = PATH_MAX,
- .private = FILE_RELEASE_AGENT,
};
static int cgroup_populate_dir(struct cgroup *cgrp)
@@ -2879,6 +3229,7 @@ int __init cgroup_init_early(void)
init_task.cgroups = &init_css_set;
init_css_set_link.cg = &init_css_set;
+ init_css_set_link.cgrp = dummytop;
list_add(&init_css_set_link.cgrp_link_list,
&rootnode.top_cgroup.css_sets);
list_add(&init_css_set_link.cg_link_list,
@@ -2933,7 +3284,7 @@ int __init cgroup_init(void)
/* Add init_css_set to the hash table */
hhead = css_set_hash(init_css_set.subsys);
hlist_add_head(&init_css_set.hlist, hhead);
-
+ BUG_ON(!init_root_id(&rootnode));
err = register_filesystem(&cgroup_fs_type);
if (err < 0)
goto out;
@@ -2986,15 +3337,16 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
for_each_active_root(root) {
struct cgroup_subsys *ss;
struct cgroup *cgrp;
- int subsys_id;
int count = 0;
- seq_printf(m, "%lu:", root->subsys_bits);
+ seq_printf(m, "%d:", root->hierarchy_id);
for_each_subsys(root, ss)
seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
+ if (strlen(root->name))
+ seq_printf(m, "%sname=%s", count ? "," : "",
+ root->name);
seq_putc(m, ':');
- get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
- cgrp = task_cgroup(tsk, subsys_id);
+ cgrp = task_cgroup_from_root(tsk, root);
retval = cgroup_path(cgrp, buf, PAGE_SIZE);
if (retval < 0)
goto out_unlock;
@@ -3033,8 +3385,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
mutex_lock(&cgroup_mutex);
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
- seq_printf(m, "%s\t%lu\t%d\t%d\n",
- ss->name, ss->root->subsys_bits,
+ seq_printf(m, "%s\t%d\t%d\t%d\n",
+ ss->name, ss->root->hierarchy_id,
ss->root->number_of_cgroups, !ss->disabled);
}
mutex_unlock(&cgroup_mutex);
@@ -3320,13 +3672,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
{
int ret;
struct cgroup *target;
- int subsys_id;
if (cgrp == dummytop)
return 1;
- get_first_subsys(cgrp, NULL, &subsys_id);
- target = task_cgroup(task, subsys_id);
+ target = task_cgroup_from_root(task, cgrp->root);
while (cgrp != target && cgrp!= cgrp->top_cgroup)
cgrp = cgrp->parent;
ret = (cgrp == target);
@@ -3693,3 +4043,154 @@ css_get_next(struct cgroup_subsys *ss, int id,
return ret;
}
+#ifdef CONFIG_CGROUP_DEBUG
+static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
+ struct cgroup *cont)
+{
+ struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
+
+ if (!css)
+ return ERR_PTR(-ENOMEM);
+
+ return css;
+}
+
+static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ kfree(cont->subsys[debug_subsys_id]);
+}
+
+static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
+{
+ return atomic_read(&cont->count);
+}
+
+static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
+{
+ return cgroup_task_count(cont);
+}
+
+static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
+{
+ return (u64)(unsigned long)current->cgroups;
+}
+
+static u64 current_css_set_refcount_read(struct cgroup *cont,
+ struct cftype *cft)
+{
+ u64 count;
+
+ rcu_read_lock();
+ count = atomic_read(&current->cgroups->refcount);
+ rcu_read_unlock();
+ return count;
+}
+
+static int current_css_set_cg_links_read(struct cgroup *cont,
+ struct cftype *cft,
+ struct seq_file *seq)
+{
+ struct cg_cgroup_link *link;
+ struct css_set *cg;
+
+ read_lock(&css_set_lock);
+ rcu_read_lock();
+ cg = rcu_dereference(current->cgroups);
+ list_for_each_entry(link, &cg->cg_links, cg_link_list) {
+ struct cgroup *c = link->cgrp;
+ const char *name;
+
+ if (c->dentry)
+ name = c->dentry->d_name.name;
+ else
+ name = "?";
+ seq_printf(seq, "Root %d group %s\n",
+ c->root->hierarchy_id, name);
+ }
+ rcu_read_unlock();
+ read_unlock(&css_set_lock);
+ return 0;
+}
+
+#define MAX_TASKS_SHOWN_PER_CSS 25
+static int cgroup_css_links_read(struct cgroup *cont,
+ struct cftype *cft,
+ struct seq_file *seq)
+{
+ struct cg_cgroup_link *link;
+
+ read_lock(&css_set_lock);
+ list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
+ struct css_set *cg = link->cg;
+ struct task_struct *task;
+ int count = 0;
+ seq_printf(seq, "css_set %p\n", cg);
+ list_for_each_entry(task, &cg->tasks, cg_list) {
+ if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
+ seq_puts(seq, " ...\n");
+ break;
+ } else {
+ seq_printf(seq, " task %d\n",
+ task_pid_vnr(task));
+ }
+ }
+ }
+ read_unlock(&css_set_lock);
+ return 0;
+}
+
+static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
+{
+ return test_bit(CGRP_RELEASABLE, &cgrp->flags);
+}
+
+static struct cftype debug_files[] = {
+ {
+ .name = "cgroup_refcount",
+ .read_u64 = cgroup_refcount_read,
+ },
+ {
+ .name = "taskcount",
+ .read_u64 = debug_taskcount_read,
+ },
+
+ {
+ .name = "current_css_set",
+ .read_u64 = current_css_set_read,
+ },
+
+ {
+ .name = "current_css_set_refcount",
+ .read_u64 = current_css_set_refcount_read,
+ },
+
+ {
+ .name = "current_css_set_cg_links",
+ .read_seq_string = current_css_set_cg_links_read,
+ },
+
+ {
+ .name = "cgroup_css_links",
+ .read_seq_string = cgroup_css_links_read,
+ },
+
+ {
+ .name = "releasable",
+ .read_u64 = releasable_read,
+ },
+};
+
+static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ return cgroup_add_files(cont, ss, debug_files,
+ ARRAY_SIZE(debug_files));
+}
+
+struct cgroup_subsys debug_subsys = {
+ .name = "debug",
+ .create = debug_create,
+ .destroy = debug_destroy,
+ .populate = debug_populate,
+ .subsys_id = debug_subsys_id,
+};
+#endif /* CONFIG_CGROUP_DEBUG */
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
deleted file mode 100644
index 0c92d797baa..00000000000
--- a/kernel/cgroup_debug.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * kernel/cgroup_debug.c - Example cgroup subsystem that
- * exposes debug info
- *
- * Copyright (C) Google Inc, 2007
- *
- * Developed by Paul Menage (menage@google.com)
- *
- */
-
-#include <linux/cgroup.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/rcupdate.h>
-
-#include <asm/atomic.h>
-
-static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
- struct cgroup *cont)
-{
- struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
-
- if (!css)
- return ERR_PTR(-ENOMEM);
-
- return css;
-}
-
-static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
-{
- kfree(cont->subsys[debug_subsys_id]);
-}
-
-static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
-{
- return atomic_read(&cont->count);
-}
-
-static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
-{
- u64 count;
-
- count = cgroup_task_count(cont);
- return count;
-}
-
-static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
-{
- return (u64)(long)current->cgroups;
-}
-
-static u64 current_css_set_refcount_read(struct cgroup *cont,
- struct cftype *cft)
-{
- u64 count;
-
- rcu_read_lock();
- count = atomic_read(&current->cgroups->refcount);
- rcu_read_unlock();
- return count;
-}
-
-static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
-{
- return test_bit(CGRP_RELEASABLE, &cgrp->flags);
-}
-
-static struct cftype files[] = {
- {
- .name = "cgroup_refcount",
- .read_u64 = cgroup_refcount_read,
- },
- {
- .name = "taskcount",
- .read_u64 = taskcount_read,
- },
-
- {
- .name = "current_css_set",
- .read_u64 = current_css_set_read,
- },
-
- {
- .name = "current_css_set_refcount",
- .read_u64 = current_css_set_refcount_read,
- },
-
- {
- .name = "releasable",
- .read_u64 = releasable_read,
- },
-};
-
-static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
-{
- return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
-}
-
-struct cgroup_subsys debug_subsys = {
- .name = "debug",
- .create = debug_create,
- .destroy = debug_destroy,
- .populate = debug_populate,
- .subsys_id = debug_subsys_id,
-};
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index fb249e2bcad..59e9ef6aab4 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -159,7 +159,7 @@ static bool is_task_frozen_enough(struct task_struct *task)
*/
static int freezer_can_attach(struct cgroup_subsys *ss,
struct cgroup *new_cgroup,
- struct task_struct *task)
+ struct task_struct *task, bool threadgroup)
{
struct freezer *freezer;
@@ -177,6 +177,19 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
if (freezer->state == CGROUP_FROZEN)
return -EBUSY;
+ if (threadgroup) {
+ struct task_struct *c;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+ if (is_task_frozen_enough(c)) {
+ rcu_read_unlock();
+ return -EBUSY;
+ }
+ }
+ rcu_read_unlock();
+ }
+
return 0;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7e75a41bd50..b5cb469d254 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1324,9 +1324,10 @@ static int fmeter_getrate(struct fmeter *fmp)
static cpumask_var_t cpus_attach;
/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys *ss,
- struct cgroup *cont, struct task_struct *tsk)
+static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+ struct task_struct *tsk, bool threadgroup)
{
+ int ret;
struct cpuset *cs = cgroup_cs(cont);
if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1343,18 +1344,51 @@ static int cpuset_can_attach(struct cgroup_subsys *ss,
if (tsk->flags & PF_THREAD_BOUND)
return -EINVAL;
- return security_task_setscheduler(tsk, 0, NULL);
+ ret = security_task_setscheduler(tsk, 0, NULL);
+ if (ret)
+ return ret;
+ if (threadgroup) {
+ struct task_struct *c;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+ ret = security_task_setscheduler(c, 0, NULL);
+ if (ret) {
+ rcu_read_unlock();
+ return ret;
+ }
+ }
+ rcu_read_unlock();
+ }
+ return 0;
+}
+
+static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
+ struct cpuset *cs)
+{
+ int err;
+ /*
+ * can_attach beforehand should guarantee that this doesn't fail.
+ * TODO: have a better way to handle failure here
+ */
+ err = set_cpus_allowed_ptr(tsk, cpus_attach);
+ WARN_ON_ONCE(err);
+
+ task_lock(tsk);
+ cpuset_change_task_nodemask(tsk, to);
+ task_unlock(tsk);
+ cpuset_update_task_spread_flag(cs, tsk);
+
}
-static void cpuset_attach(struct cgroup_subsys *ss,
- struct cgroup *cont, struct cgroup *oldcont,
- struct task_struct *tsk)
+static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+ struct cgroup *oldcont, struct task_struct *tsk,
+ bool threadgroup)
{
nodemask_t from, to;
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
- int err;
if (cs == &top_cpuset) {
cpumask_copy(cpus_attach, cpu_possible_mask);
@@ -1363,15 +1397,19 @@ static void cpuset_attach(struct cgroup_subsys *ss,
guarantee_online_cpus(cs, cpus_attach);
guarantee_online_mems(cs, &to);
}
- err = set_cpus_allowed_ptr(tsk, cpus_attach);
- if (err)
- return;
- task_lock(tsk);
- cpuset_change_task_nodemask(tsk, &to);
- task_unlock(tsk);
- cpuset_update_task_spread_flag(cs, tsk);
+ /* do per-task migration stuff possibly for each in the threadgroup */
+ cpuset_attach_task(tsk, &to, cs);
+ if (threadgroup) {
+ struct task_struct *c;
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+ cpuset_attach_task(c, &to, cs);
+ }
+ rcu_read_unlock();
+ }
+ /* change mm; only needs to be done once even if threadgroup */
from = oldcs->mems_allowed;
to = cs->mems_allowed;
mm = get_task_mm(tsk);
diff --git a/kernel/cred.c b/kernel/cred.c
index d7f7a01082e..dd76cfe5f5b 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -782,6 +782,25 @@ EXPORT_SYMBOL(set_create_files_as);
#ifdef CONFIG_DEBUG_CREDENTIALS
+bool creds_are_invalid(const struct cred *cred)
+{
+ if (cred->magic != CRED_MAGIC)
+ return true;
+ if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
+ return true;
+#ifdef CONFIG_SECURITY_SELINUX
+ if (selinux_is_enabled()) {
+ if ((unsigned long) cred->security < PAGE_SIZE)
+ return true;
+ if ((*(u32 *)cred->security & 0xffffff00) ==
+ (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+ return true;
+ }
+#endif
+ return false;
+}
+EXPORT_SYMBOL(creds_are_invalid);
+
/*
* dump invalid credentials
*/
diff --git a/kernel/exit.c b/kernel/exit.c
index 60d6fdcc926..5859f598c95 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -976,8 +976,6 @@ NORET_TYPE void do_exit(long code)
disassociate_ctty(1);
module_put(task_thread_info(tsk)->exec_domain->module);
- if (tsk->binfmt)
- module_put(tsk->binfmt->module);
proc_exit_connector(tsk);
@@ -1097,28 +1095,28 @@ struct wait_opts {
int __user *wo_stat;
struct rusage __user *wo_rusage;
+ wait_queue_t child_wait;
int notask_error;
};
-static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
+static inline
+struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
{
- struct pid *pid = NULL;
- if (type == PIDTYPE_PID)
- pid = task->pids[type].pid;
- else if (type < PIDTYPE_MAX)
- pid = task->group_leader->pids[type].pid;
- return pid;
+ if (type != PIDTYPE_PID)
+ task = task->group_leader;
+ return task->pids[type].pid;
}
-static int eligible_child(struct wait_opts *wo, struct task_struct *p)
+static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
{
- int err;
-
- if (wo->wo_type < PIDTYPE_MAX) {
- if (task_pid_type(p, wo->wo_type) != wo->wo_pid)
- return 0;
- }
+ return wo->wo_type == PIDTYPE_MAX ||
+ task_pid_type(p, wo->wo_type) == wo->wo_pid;
+}
+static int eligible_child(struct wait_opts *wo, struct task_struct *p)
+{
+ if (!eligible_pid(wo, p))
+ return 0;
/* Wait for all children (clone and not) if __WALL is set;
* otherwise, wait for clone children *only* if __WCLONE is
* set; otherwise, wait for non-clone children *only*. (Note:
@@ -1128,10 +1126,6 @@ static int eligible_child(struct wait_opts *wo, struct task_struct *p)
&& !(wo->wo_flags & __WALL))
return 0;
- err = security_task_wait(p);
- if (err)
- return err;
-
return 1;
}
@@ -1144,18 +1138,20 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
put_task_struct(p);
infop = wo->wo_info;
- if (!retval)
- retval = put_user(SIGCHLD, &infop->si_signo);
- if (!retval)
- retval = put_user(0, &infop->si_errno);
- if (!retval)
- retval = put_user((short)why, &infop->si_code);
- if (!retval)
- retval = put_user(pid, &infop->si_pid);
- if (!retval)
- retval = put_user(uid, &infop->si_uid);
- if (!retval)
- retval = put_user(status, &infop->si_status);
+ if (infop) {
+ if (!retval)
+ retval = put_user(SIGCHLD, &infop->si_signo);
+ if (!retval)
+ retval = put_user(0, &infop->si_errno);
+ if (!retval)
+ retval = put_user((short)why, &infop->si_code);
+ if (!retval)
+ retval = put_user(pid, &infop->si_pid);
+ if (!retval)
+ retval = put_user(uid, &infop->si_uid);
+ if (!retval)
+ retval = put_user(status, &infop->si_status);
+ }
if (!retval)
retval = pid;
return retval;
@@ -1485,13 +1481,14 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
* then ->notask_error is 0 if @p is an eligible child,
* or another error from security_task_wait(), or still -ECHILD.
*/
-static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent,
- int ptrace, struct task_struct *p)
+static int wait_consider_task(struct wait_opts *wo, int ptrace,
+ struct task_struct *p)
{
int ret = eligible_child(wo, p);
if (!ret)
return ret;
+ ret = security_task_wait(p);
if (unlikely(ret < 0)) {
/*
* If we have not yet seen any eligible child,
@@ -1553,7 +1550,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
* Do not consider detached threads.
*/
if (!task_detached(p)) {
- int ret = wait_consider_task(wo, tsk, 0, p);
+ int ret = wait_consider_task(wo, 0, p);
if (ret)
return ret;
}
@@ -1567,7 +1564,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
struct task_struct *p;
list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
- int ret = wait_consider_task(wo, tsk, 1, p);
+ int ret = wait_consider_task(wo, 1, p);
if (ret)
return ret;
}
@@ -1575,15 +1572,38 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
return 0;
}
+static int child_wait_callback(wait_queue_t *wait, unsigned mode,
+ int sync, void *key)
+{
+ struct wait_opts *wo = container_of(wait, struct wait_opts,
+ child_wait);
+ struct task_struct *p = key;
+
+ if (!eligible_pid(wo, p))
+ return 0;
+
+ if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
+ return 0;
+
+ return default_wake_function(wait, mode, sync, key);
+}
+
+void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
+{
+ __wake_up_sync_key(&parent->signal->wait_chldexit,
+ TASK_INTERRUPTIBLE, 1, p);
+}
+
static long do_wait(struct wait_opts *wo)
{
- DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk;
int retval;
trace_sched_process_wait(wo->wo_pid);
- add_wait_queue(&current->signal->wait_chldexit,&wait);
+ init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
+ wo->child_wait.private = current;
+ add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
repeat:
/*
* If there is nothing that can match our critiera just get out.
@@ -1624,32 +1644,7 @@ notask:
}
end:
__set_current_state(TASK_RUNNING);
- remove_wait_queue(&current->signal->wait_chldexit,&wait);
- if (wo->wo_info) {
- struct siginfo __user *infop = wo->wo_info;
-
- if (retval > 0)
- retval = 0;
- else {
- /*
- * For a WNOHANG return, clear out all the fields
- * we would set so the user can easily tell the
- * difference.
- */
- if (!retval)
- retval = put_user(0, &infop->si_signo);
- if (!retval)
- retval = put_user(0, &infop->si_errno);
- if (!retval)
- retval = put_user(0, &infop->si_code);
- if (!retval)
- retval = put_user(0, &infop->si_pid);
- if (!retval)
- retval = put_user(0, &infop->si_uid);
- if (!retval)
- retval = put_user(0, &infop->si_status);
- }
- }
+ remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
return retval;
}
@@ -1694,6 +1689,29 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
wo.wo_stat = NULL;
wo.wo_rusage = ru;
ret = do_wait(&wo);
+
+ if (ret > 0) {
+ ret = 0;
+ } else if (infop) {
+ /*
+ * For a WNOHANG return, clear out all the fields
+ * we would set so the user can easily tell the
+ * difference.
+ */
+ if (!ret)
+ ret = put_user(0, &infop->si_signo);
+ if (!ret)
+ ret = put_user(0, &infop->si_errno);
+ if (!ret)
+ ret = put_user(0, &infop->si_code);
+ if (!ret)
+ ret = put_user(0, &infop->si_pid);
+ if (!ret)
+ ret = put_user(0, &infop->si_uid);
+ if (!ret)
+ ret = put_user(0, &infop->si_status);
+ }
+
put_pid(pid);
/* avoid REGPARM breakage on x86: */
diff --git a/kernel/fork.c b/kernel/fork.c
index 51ad0b0b726..266c6af6ef1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -434,6 +434,14 @@ __setup("coredump_filter=", coredump_filter_setup);
#include <linux/init_task.h>
+static void mm_init_aio(struct mm_struct *mm)
+{
+#ifdef CONFIG_AIO
+ spin_lock_init(&mm->ioctx_lock);
+ INIT_HLIST_HEAD(&mm->ioctx_list);
+#endif
+}
+
static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
{
atomic_set(&mm->mm_users, 1);
@@ -447,10 +455,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
set_mm_counter(mm, file_rss, 0);
set_mm_counter(mm, anon_rss, 0);
spin_lock_init(&mm->page_table_lock);
- spin_lock_init(&mm->ioctx_lock);
- INIT_HLIST_HEAD(&mm->ioctx_list);
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
+ mm_init_aio(mm);
mm_init_owner(mm, p);
if (likely(!mm_alloc_pgd(mm))) {
@@ -511,6 +518,8 @@ void mmput(struct mm_struct *mm)
spin_unlock(&mmlist_lock);
}
put_swap_token(mm);
+ if (mm->binfmt)
+ module_put(mm->binfmt->module);
mmdrop(mm);
}
}
@@ -636,9 +645,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
mm->hiwater_rss = get_mm_rss(mm);
mm->hiwater_vm = mm->total_vm;
+ if (mm->binfmt && !try_module_get(mm->binfmt->module))
+ goto free_pt;
+
return mm;
free_pt:
+ /* don't put binfmt in mmput, we haven't got module yet */
+ mm->binfmt = NULL;
mmput(mm);
fail_nomem:
@@ -979,6 +993,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
return ERR_PTR(-EINVAL);
+ /*
+ * Siblings of global init remain as zombies on exit since they are
+ * not reaped by their parent (swapper). To solve this and to avoid
+ * multi-rooted process trees, prevent global and container-inits
+ * from creating siblings.
+ */
+ if ((clone_flags & CLONE_PARENT) &&
+ current->signal->flags & SIGNAL_UNKILLABLE)
+ return ERR_PTR(-EINVAL);
+
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;
@@ -1020,9 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (!try_module_get(task_thread_info(p)->exec_domain->module))
goto bad_fork_cleanup_count;
- if (p->binfmt && !try_module_get(p->binfmt->module))
- goto bad_fork_cleanup_put_domain;
-
p->did_exec = 0;
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
copy_flags(clone_flags, p);
@@ -1310,9 +1331,6 @@ bad_fork_cleanup_cgroup:
#endif
cgroup_exit(p, cgroup_callbacks_done);
delayacct_tsk_free(p);
- if (p->binfmt)
- module_put(p->binfmt->module);
-bad_fork_cleanup_put_domain:
module_put(task_thread_info(p)->exec_domain->module);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 654efd09f6a..70a298d6da7 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -34,7 +34,7 @@ config GCOV_KERNEL
config GCOV_PROFILE_ALL
bool "Profile entire Kernel"
depends on GCOV_KERNEL
- depends on S390 || X86 || (PPC && EXPERIMENTAL)
+ depends on S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE
default n
---help---
This options activates profiling for the entire kernel.
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 022a4927b78..d4e84174740 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -171,12 +171,12 @@ static unsigned long timeout_jiffies(unsigned long timeout)
* Process updating of timeout sysctl
*/
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int ret;
- ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
goto out;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 689d20f3930..9fcb53a11f8 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -143,7 +143,6 @@ struct subprocess_info {
static int ____call_usermodehelper(void *data)
{
struct subprocess_info *sub_info = data;
- enum umh_wait wait = sub_info->wait;
int retval;
BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
@@ -185,14 +184,10 @@ static int ____call_usermodehelper(void *data)
*/
set_user_nice(current, 0);
- if (wait == UMH_WAIT_EXEC)
- complete(sub_info->complete);
-
retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
/* Exec failed? */
- if (wait != UMH_WAIT_EXEC)
- sub_info->retval = retval;
+ sub_info->retval = retval;
do_exit(0);
}
@@ -271,14 +266,16 @@ static void __call_usermodehelper(struct work_struct *work)
switch (wait) {
case UMH_NO_WAIT:
- case UMH_WAIT_EXEC:
break;
case UMH_WAIT_PROC:
if (pid > 0)
break;
sub_info->retval = pid;
- break;
+ /* FALLTHROUGH */
+
+ case UMH_WAIT_EXEC:
+ complete(sub_info->complete);
}
}
diff --git a/kernel/module.c b/kernel/module.c
index e6bc4b28aa6..5a29397ca4b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1797,6 +1797,17 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs,
}
}
+static void free_modinfo(struct module *mod)
+{
+ struct module_attribute *attr;
+ int i;
+
+ for (i = 0; (attr = modinfo_attrs[i]); i++) {
+ if (attr->free)
+ attr->free(mod);
+ }
+}
+
#ifdef CONFIG_KALLSYMS
/* lookup symbol in given range of kernel_symbols */
@@ -1862,13 +1873,93 @@ static char elf_type(const Elf_Sym *sym,
return '?';
}
+static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
+ unsigned int shnum)
+{
+ const Elf_Shdr *sec;
+
+ if (src->st_shndx == SHN_UNDEF
+ || src->st_shndx >= shnum
+ || !src->st_name)
+ return false;
+
+ sec = sechdrs + src->st_shndx;
+ if (!(sec->sh_flags & SHF_ALLOC)
+#ifndef CONFIG_KALLSYMS_ALL
+ || !(sec->sh_flags & SHF_EXECINSTR)
+#endif
+ || (sec->sh_entsize & INIT_OFFSET_MASK))
+ return false;
+
+ return true;
+}
+
+static unsigned long layout_symtab(struct module *mod,
+ Elf_Shdr *sechdrs,
+ unsigned int symindex,
+ unsigned int strindex,
+ const Elf_Ehdr *hdr,
+ const char *secstrings,
+ unsigned long *pstroffs,
+ unsigned long *strmap)
+{
+ unsigned long symoffs;
+ Elf_Shdr *symsect = sechdrs + symindex;
+ Elf_Shdr *strsect = sechdrs + strindex;
+ const Elf_Sym *src;
+ const char *strtab;
+ unsigned int i, nsrc, ndst;
+
+ /* Put symbol section at end of init part of module. */
+ symsect->sh_flags |= SHF_ALLOC;
+ symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
+ symindex) | INIT_OFFSET_MASK;
+ DEBUGP("\t%s\n", secstrings + symsect->sh_name);
+
+ src = (void *)hdr + symsect->sh_offset;
+ nsrc = symsect->sh_size / sizeof(*src);
+ strtab = (void *)hdr + strsect->sh_offset;
+ for (ndst = i = 1; i < nsrc; ++i, ++src)
+ if (is_core_symbol(src, sechdrs, hdr->e_shnum)) {
+ unsigned int j = src->st_name;
+
+ while(!__test_and_set_bit(j, strmap) && strtab[j])
+ ++j;
+ ++ndst;
+ }
+
+ /* Append room for core symbols at end of core part. */
+ symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
+ mod->core_size = symoffs + ndst * sizeof(Elf_Sym);
+
+ /* Put string table section at end of init part of module. */
+ strsect->sh_flags |= SHF_ALLOC;
+ strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
+ strindex) | INIT_OFFSET_MASK;
+ DEBUGP("\t%s\n", secstrings + strsect->sh_name);
+
+ /* Append room for core symbols' strings at end of core part. */
+ *pstroffs = mod->core_size;
+ __set_bit(0, strmap);
+ mod->core_size += bitmap_weight(strmap, strsect->sh_size);
+
+ return symoffs;
+}
+
static void add_kallsyms(struct module *mod,
Elf_Shdr *sechdrs,
+ unsigned int shnum,
unsigned int symindex,
unsigned int strindex,
- const char *secstrings)
+ unsigned long symoffs,
+ unsigned long stroffs,
+ const char *secstrings,
+ unsigned long *strmap)
{
- unsigned int i;
+ unsigned int i, ndst;
+ const Elf_Sym *src;
+ Elf_Sym *dst;
+ char *s;
mod->symtab = (void *)sechdrs[symindex].sh_addr;
mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
@@ -1878,13 +1969,44 @@ static void add_kallsyms(struct module *mod,
for (i = 0; i < mod->num_symtab; i++)
mod->symtab[i].st_info
= elf_type(&mod->symtab[i], sechdrs, secstrings, mod);
+
+ mod->core_symtab = dst = mod->module_core + symoffs;
+ src = mod->symtab;
+ *dst = *src;
+ for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
+ if (!is_core_symbol(src, sechdrs, shnum))
+ continue;
+ dst[ndst] = *src;
+ dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name);
+ ++ndst;
+ }
+ mod->core_num_syms = ndst;
+
+ mod->core_strtab = s = mod->module_core + stroffs;
+ for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i)
+ if (test_bit(i, strmap))
+ *++s = mod->strtab[i];
}
#else
+static inline unsigned long layout_symtab(struct module *mod,
+ Elf_Shdr *sechdrs,
+ unsigned int symindex,
+ unsigned int strindex,
+ const Elf_Hdr *hdr,
+ const char *secstrings,
+ unsigned long *pstroffs,
+ unsigned long *strmap)
+{
+}
static inline void add_kallsyms(struct module *mod,
Elf_Shdr *sechdrs,
+ unsigned int shnum,
unsigned int symindex,
unsigned int strindex,
- const char *secstrings)
+ unsigned long symoffs,
+ unsigned long stroffs,
+ const char *secstrings,
+ const unsigned long *strmap)
{
}
#endif /* CONFIG_KALLSYMS */
@@ -1959,6 +2081,9 @@ static noinline struct module *load_module(void __user *umod,
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+#ifdef CONFIG_KALLSYMS
+ unsigned long symoffs, stroffs, *strmap;
+#endif
mm_segment_t old_fs;
DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2040,11 +2165,6 @@ static noinline struct module *load_module(void __user *umod,
/* Don't keep modinfo and version sections. */
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
-#ifdef CONFIG_KALLSYMS
- /* Keep symbol and string tables for decoding later. */
- sechdrs[symindex].sh_flags |= SHF_ALLOC;
- sechdrs[strindex].sh_flags |= SHF_ALLOC;
-#endif
/* Check module struct version now, before we try to use module. */
if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2080,6 +2200,13 @@ static noinline struct module *load_module(void __user *umod,
goto free_hdr;
}
+ strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size)
+ * sizeof(long), GFP_KERNEL);
+ if (!strmap) {
+ err = -ENOMEM;
+ goto free_mod;
+ }
+
if (find_module(mod->name)) {
err = -EEXIST;
goto free_mod;
@@ -2109,6 +2236,8 @@ static noinline struct module *load_module(void __user *umod,
this is done generically; there doesn't appear to be any
special cases for the architectures. */
layout_sections(mod, hdr, sechdrs, secstrings);
+ symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr,
+ secstrings, &stroffs, strmap);
/* Do the allocs. */
ptr = module_alloc_update_bounds(mod->core_size);
@@ -2313,7 +2442,10 @@ static noinline struct module *load_module(void __user *umod,
percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
sechdrs[pcpuindex].sh_size);
- add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+ add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
+ symoffs, stroffs, secstrings, strmap);
+ kfree(strmap);
+ strmap = NULL;
if (!mod->taints) {
struct _ddebug *debug;
@@ -2385,13 +2517,14 @@ static noinline struct module *load_module(void __user *umod,
synchronize_sched();
module_arch_cleanup(mod);
cleanup:
+ free_modinfo(mod);
kobject_del(&mod->mkobj.kobj);
kobject_put(&mod->mkobj.kobj);
free_unload:
module_unload_free(mod);
#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
- free_init:
percpu_modfree(mod->refptr);
+ free_init:
#endif
module_free(mod, mod->module_init);
free_core:
@@ -2402,6 +2535,7 @@ static noinline struct module *load_module(void __user *umod,
percpu_modfree(percpu);
free_mod:
kfree(args);
+ kfree(strmap);
free_hdr:
vfree(hdr);
return ERR_PTR(err);
@@ -2491,6 +2625,11 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
/* Drop initial reference. */
module_put(mod);
trim_init_extable(mod);
+#ifdef CONFIG_KALLSYMS
+ mod->num_symtab = mod->core_num_syms;
+ mod->symtab = mod->core_symtab;
+ mod->strtab = mod->core_strtab;
+#endif
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 5aa854f9e5a..2a5dfec8efe 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -42,8 +42,8 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
* (hence either you are in the same cgroup as task, or in an
* ancestor cgroup thereof)
*/
-static int ns_can_attach(struct cgroup_subsys *ss,
- struct cgroup *new_cgroup, struct task_struct *task)
+static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
+ struct task_struct *task, bool threadgroup)
{
if (current != task) {
if (!capable(CAP_SYS_ADMIN))
@@ -56,6 +56,18 @@ static int ns_can_attach(struct cgroup_subsys *ss,
if (!cgroup_is_descendant(new_cgroup, task))
return -EPERM;
+ if (threadgroup) {
+ struct task_struct *c;
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+ if (!cgroup_is_descendant(new_cgroup, c)) {
+ rcu_read_unlock();
+ return -EPERM;
+ }
+ }
+ rcu_read_unlock();
+ }
+
return 0;
}
diff --git a/kernel/params.c b/kernel/params.c
index 7f6912ced2b..9da58eabdcb 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -23,6 +23,7 @@
#include <linux/device.h>
#include <linux/err.h>
#include <linux/slab.h>
+#include <linux/ctype.h>
#if 0
#define DEBUGP printk
@@ -87,7 +88,7 @@ static char *next_arg(char *args, char **param, char **val)
}
for (i = 0; args[i]; i++) {
- if (args[i] == ' ' && !in_quote)
+ if (isspace(args[i]) && !in_quote)
break;
if (equals == 0) {
if (args[i] == '=')
@@ -121,7 +122,7 @@ static char *next_arg(char *args, char **param, char **val)
next = args + i;
/* Chew up trailing spaces. */
- while (*next == ' ')
+ while (isspace(*next))
next++;
return next;
}
@@ -138,7 +139,7 @@ int parse_args(const char *name,
DEBUGP("Parsing ARGS: %s\n", args);
/* Chew leading spaces */
- while (*args == ' ')
+ while (isspace(*args))
args++;
while (*args) {
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 821722ae58a..86b3796b043 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -118,7 +118,7 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old
{
if (!(flags & CLONE_NEWPID))
return get_pid_ns(old_ns);
- if (flags & CLONE_THREAD)
+ if (flags & (CLONE_THREAD|CLONE_PARENT))
return ERR_PTR(-EINVAL);
return create_pid_namespace(old_ns);
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 8ba052c86d4..b101cdc4df3 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -13,7 +13,6 @@
#include <linux/module.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/bitops.h>
#include <linux/genhd.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 307c285af59..23bd09cd042 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -266,9 +266,10 @@ static int ignoring_children(struct sighand_struct *sigh)
* or self-reaping. Do notification now if it would have happened earlier.
* If it should reap itself, return true.
*
- * If it's our own child, there is no notification to do.
- * But if our normal children self-reap, then this child
- * was prevented by ptrace and we must reap it now.
+ * If it's our own child, there is no notification to do. But if our normal
+ * children self-reap, then this child was prevented by ptrace and we must
+ * reap it now, in that case we must also wake up sub-threads sleeping in
+ * do_wait().
*/
static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
{
@@ -278,8 +279,10 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
if (!task_detached(p) && thread_group_empty(p)) {
if (!same_thread_group(p->real_parent, tracer))
do_notify_parent(p, p->exit_signal);
- else if (ignoring_children(tracer->sighand))
+ else if (ignoring_children(tracer->sighand)) {
+ __wake_up_parent(p, tracer);
p->exit_signal = -1;
+ }
}
if (task_detached(p)) {
/* Mark it as in the process of being reaped. */
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index e1338f07431..88faec23e83 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -19,6 +19,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent)
{
spin_lock_init(&counter->lock);
counter->limit = RESOURCE_MAX;
+ counter->soft_limit = RESOURCE_MAX;
counter->parent = parent;
}
@@ -36,17 +37,27 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
}
int res_counter_charge(struct res_counter *counter, unsigned long val,
- struct res_counter **limit_fail_at)
+ struct res_counter **limit_fail_at,
+ struct res_counter **soft_limit_fail_at)
{
int ret;
unsigned long flags;
struct res_counter *c, *u;
*limit_fail_at = NULL;
+ if (soft_limit_fail_at)
+ *soft_limit_fail_at = NULL;
local_irq_save(flags);
for (c = counter; c != NULL; c = c->parent) {
spin_lock(&c->lock);
ret = res_counter_charge_locked(c, val);
+ /*
+ * With soft limits, we return the highest ancestor
+ * that exceeds its soft limit
+ */
+ if (soft_limit_fail_at &&
+ !res_counter_soft_limit_check_locked(c))
+ *soft_limit_fail_at = c;
spin_unlock(&c->lock);
if (ret < 0) {
*limit_fail_at = c;
@@ -74,7 +85,8 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
counter->usage -= val;
}
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+ bool *was_soft_limit_excess)
{
unsigned long flags;
struct res_counter *c;
@@ -82,6 +94,9 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
local_irq_save(flags);
for (c = counter; c != NULL; c = c->parent) {
spin_lock(&c->lock);
+ if (was_soft_limit_excess)
+ *was_soft_limit_excess =
+ !res_counter_soft_limit_check_locked(c);
res_counter_uncharge_locked(c, val);
spin_unlock(&c->lock);
}
@@ -101,6 +116,8 @@ res_counter_member(struct res_counter *counter, int member)
return &counter->limit;
case RES_FAILCNT:
return &counter->failcnt;
+ case RES_SOFT_LIMIT:
+ return &counter->soft_limit;
};
BUG();
diff --git a/kernel/sched.c b/kernel/sched.c
index 2f76e06bea5..ee61f454a98 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -10312,7 +10312,7 @@ static int sched_rt_global_constraints(void)
#endif /* CONFIG_RT_GROUP_SCHED */
int sched_rt_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
@@ -10323,7 +10323,7 @@ int sched_rt_handler(struct ctl_table *table, int write,
old_period = sysctl_sched_rt_period;
old_runtime = sysctl_sched_rt_runtime;
- ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write) {
ret = sched_rt_global_constraints();
@@ -10377,8 +10377,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
}
static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct task_struct *tsk)
+cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
{
#ifdef CONFIG_RT_GROUP_SCHED
if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
@@ -10388,15 +10387,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
if (tsk->sched_class != &fair_sched_class)
return -EINVAL;
#endif
+ return 0;
+}
+static int
+cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct task_struct *tsk, bool threadgroup)
+{
+ int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
+ if (retval)
+ return retval;
+ if (threadgroup) {
+ struct task_struct *c;
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+ retval = cpu_cgroup_can_attach_task(cgrp, c);
+ if (retval) {
+ rcu_read_unlock();
+ return retval;
+ }
+ }
+ rcu_read_unlock();
+ }
return 0;
}
static void
cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cont, struct task_struct *tsk)
+ struct cgroup *old_cont, struct task_struct *tsk,
+ bool threadgroup)
{
sched_move_task(tsk);
+ if (threadgroup) {
+ struct task_struct *c;
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+ sched_move_task(c);
+ }
+ rcu_read_unlock();
+ }
}
#ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ecc637a0d59..4e777b47eed 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
#ifdef CONFIG_SCHED_DEBUG
int sched_nr_latency_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
- int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
diff --git a/kernel/signal.c b/kernel/signal.c
index 64c5deeaca5..6705320784f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -705,7 +705,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
if (why) {
/*
- * The first thread which returns from finish_stop()
+ * The first thread which returns from do_signal_stop()
* will take ->siglock, notice SIGNAL_CLD_MASK, and
* notify its parent. See get_signal_to_deliver().
*/
@@ -971,6 +971,20 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
return send_signal(sig, info, t, 0);
}
+int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+ bool group)
+{
+ unsigned long flags;
+ int ret = -ESRCH;
+
+ if (lock_task_sighand(p, &flags)) {
+ ret = send_signal(sig, info, p, group);
+ unlock_task_sighand(p, &flags);
+ }
+
+ return ret;
+}
+
/*
* Force a signal that the process can't ignore: if necessary
* we unblock the signal and change any SIG_IGN to SIG_DFL.
@@ -1036,12 +1050,6 @@ void zap_other_threads(struct task_struct *p)
}
}
-int __fatal_signal_pending(struct task_struct *tsk)
-{
- return sigismember(&tsk->pending.signal, SIGKILL);
-}
-EXPORT_SYMBOL(__fatal_signal_pending);
-
struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
{
struct sighand_struct *sighand;
@@ -1068,18 +1076,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long
*/
int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
- unsigned long flags;
- int ret;
+ int ret = check_kill_permission(sig, info, p);
- ret = check_kill_permission(sig, info, p);
-
- if (!ret && sig) {
- ret = -ESRCH;
- if (lock_task_sighand(p, &flags)) {
- ret = __group_send_sig_info(sig, info, p);
- unlock_task_sighand(p, &flags);
- }
- }
+ if (!ret && sig)
+ ret = do_send_sig_info(sig, info, p, true);
return ret;
}
@@ -1224,15 +1224,9 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
* These are for backward compatibility with the rest of the kernel source.
*/
-/*
- * The caller must ensure the task can't exit.
- */
int
send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
- int ret;
- unsigned long flags;
-
/*
* Make sure legacy kernel users don't send in bad values
* (normal paths check this in check_kill_permission).
@@ -1240,10 +1234,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
if (!valid_signal(sig))
return -EINVAL;
- spin_lock_irqsave(&p->sighand->siglock, flags);
- ret = specific_send_sig_info(sig, info, p);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
- return ret;
+ return do_send_sig_info(sig, info, p, false);
}
#define __si_special(priv) \
@@ -1383,15 +1374,6 @@ ret:
}
/*
- * Wake up any threads in the parent blocked in wait* syscalls.
- */
-static inline void __wake_up_parent(struct task_struct *p,
- struct task_struct *parent)
-{
- wake_up_interruptible_sync(&parent->signal->wait_chldexit);
-}
-
-/*
* Let a parent know about the death of a child.
* For a stopped/continued status change, use do_notify_parent_cldstop instead.
*
@@ -1673,29 +1655,6 @@ void ptrace_notify(int exit_code)
spin_unlock_irq(&current->sighand->siglock);
}
-static void
-finish_stop(int stop_count)
-{
- /*
- * If there are no other threads in the group, or if there is
- * a group stop in progress and we are the last to stop,
- * report to the parent. When ptraced, every thread reports itself.
- */
- if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
- read_lock(&tasklist_lock);
- do_notify_parent_cldstop(current, CLD_STOPPED);
- read_unlock(&tasklist_lock);
- }
-
- do {
- schedule();
- } while (try_to_freeze());
- /*
- * Now we don't run again until continued.
- */
- current->exit_code = 0;
-}
-
/*
* This performs the stopping for SIGSTOP and other stop signals.
* We have to stop all threads in the thread group.
@@ -1705,15 +1664,9 @@ finish_stop(int stop_count)
static int do_signal_stop(int signr)
{
struct signal_struct *sig = current->signal;
- int stop_count;
+ int notify;
- if (sig->group_stop_count > 0) {
- /*
- * There is a group stop in progress. We don't need to
- * start another one.
- */
- stop_count = --sig->group_stop_count;
- } else {
+ if (!sig->group_stop_count) {
struct task_struct *t;
if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
@@ -1725,7 +1678,7 @@ static int do_signal_stop(int signr)
*/
sig->group_exit_code = signr;
- stop_count = 0;
+ sig->group_stop_count = 1;
for (t = next_thread(current); t != current; t = next_thread(t))
/*
* Setting state to TASK_STOPPED for a group
@@ -1734,19 +1687,44 @@ static int do_signal_stop(int signr)
*/
if (!(t->flags & PF_EXITING) &&
!task_is_stopped_or_traced(t)) {
- stop_count++;
+ sig->group_stop_count++;
signal_wake_up(t, 0);
}
- sig->group_stop_count = stop_count;
}
+ /*
+ * If there are no other threads in the group, or if there is
+ * a group stop in progress and we are the last to stop, report
+ * to the parent. When ptraced, every thread reports itself.
+ */
+ notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
+ notify = tracehook_notify_jctl(notify, CLD_STOPPED);
+ /*
+ * tracehook_notify_jctl() can drop and reacquire siglock, so
+ * we keep ->group_stop_count != 0 before the call. If SIGCONT
+ * or SIGKILL comes in between ->group_stop_count == 0.
+ */
+ if (sig->group_stop_count) {
+ if (!--sig->group_stop_count)
+ sig->flags = SIGNAL_STOP_STOPPED;
+ current->exit_code = sig->group_exit_code;
+ __set_current_state(TASK_STOPPED);
+ }
+ spin_unlock_irq(&current->sighand->siglock);
- if (stop_count == 0)
- sig->flags = SIGNAL_STOP_STOPPED;
- current->exit_code = sig->group_exit_code;
- __set_current_state(TASK_STOPPED);
+ if (notify) {
+ read_lock(&tasklist_lock);
+ do_notify_parent_cldstop(current, notify);
+ read_unlock(&tasklist_lock);
+ }
+
+ /* Now we don't run again until woken by SIGCONT or SIGKILL */
+ do {
+ schedule();
+ } while (try_to_freeze());
+
+ tracehook_finish_jctl();
+ current->exit_code = 0;
- spin_unlock_irq(&current->sighand->siglock);
- finish_stop(stop_count);
return 1;
}
@@ -1815,14 +1793,15 @@ relock:
int why = (signal->flags & SIGNAL_STOP_CONTINUED)
? CLD_CONTINUED : CLD_STOPPED;
signal->flags &= ~SIGNAL_CLD_MASK;
- spin_unlock_irq(&sighand->siglock);
- if (unlikely(!tracehook_notify_jctl(1, why)))
- goto relock;
+ why = tracehook_notify_jctl(why, CLD_CONTINUED);
+ spin_unlock_irq(&sighand->siglock);
- read_lock(&tasklist_lock);
- do_notify_parent_cldstop(current->group_leader, why);
- read_unlock(&tasklist_lock);
+ if (why) {
+ read_lock(&tasklist_lock);
+ do_notify_parent_cldstop(current->group_leader, why);
+ read_unlock(&tasklist_lock);
+ }
goto relock;
}
@@ -1987,14 +1966,14 @@ void exit_signals(struct task_struct *tsk)
if (unlikely(tsk->signal->group_stop_count) &&
!--tsk->signal->group_stop_count) {
tsk->signal->flags = SIGNAL_STOP_STOPPED;
- group_stop = 1;
+ group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
}
out:
spin_unlock_irq(&tsk->sighand->siglock);
- if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
+ if (unlikely(group_stop)) {
read_lock(&tasklist_lock);
- do_notify_parent_cldstop(tsk, CLD_STOPPED);
+ do_notify_parent_cldstop(tsk, group_stop);
read_unlock(&tasklist_lock);
}
}
@@ -2290,7 +2269,6 @@ static int
do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
{
struct task_struct *p;
- unsigned long flags;
int error = -ESRCH;
rcu_read_lock();
@@ -2300,14 +2278,16 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
/*
* The null signal is a permissions and process existence
* probe. No signal is actually delivered.
- *
- * If lock_task_sighand() fails we pretend the task dies
- * after receiving the signal. The window is tiny, and the
- * signal is private anyway.
*/
- if (!error && sig && lock_task_sighand(p, &flags)) {
- error = specific_send_sig_info(sig, info, p);
- unlock_task_sighand(p, &flags);
+ if (!error && sig) {
+ error = do_send_sig_info(sig, info, p, false);
+ /*
+ * If lock_task_sighand() failed we pretend the task
+ * dies after receiving the signal. The window is tiny,
+ * and the signal is private anyway.
+ */
+ if (unlikely(error == -ESRCH))
+ error = 0;
}
}
rcu_read_unlock();
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 09d7519557d..0d31135efbf 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -26,10 +26,10 @@ static void slow_work_cull_timeout(unsigned long);
static void slow_work_oom_timeout(unsigned long);
#ifdef CONFIG_SYSCTL
-static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *,
+static int slow_work_min_threads_sysctl(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *,
+static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
#endif
@@ -493,10 +493,10 @@ static void slow_work_oom_timeout(unsigned long data)
* Handle adjustment of the minimum number of threads
*/
static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
int n;
if (ret == 0) {
@@ -521,10 +521,10 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
* Handle adjustment of the maximum number of threads
*/
static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
int n;
if (ret == 0) {
diff --git a/kernel/smp.c b/kernel/smp.c
index fd47a256a24..c9d1c7835c2 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -347,13 +347,6 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
generic_exec_single(cpu, data, wait);
}
-/* Deprecated: shim for archs using old arch_send_call_function_ipi API. */
-
-#ifndef arch_send_call_function_ipi_mask
-# define arch_send_call_function_ipi_mask(maskp) \
- arch_send_call_function_ipi(*(maskp))
-#endif
-
/**
* smp_call_function_many(): Run a function on a set of other CPUs.
* @mask: The set of cpus to run on (only runs on online subset).
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 88796c33083..81324d12eb3 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -90,11 +90,11 @@ void touch_all_softlockup_watchdogs(void)
EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
touch_all_softlockup_watchdogs();
- return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
/*
diff --git a/kernel/sys.c b/kernel/sys.c
index ebcb1561172..255475d163e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1542,6 +1542,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
current->timer_slack_ns = arg2;
error = 0;
break;
+ case PR_MCE_KILL:
+ if (arg4 | arg5)
+ return -EINVAL;
+ switch (arg2) {
+ case 0:
+ if (arg3 != 0)
+ return -EINVAL;
+ current->flags &= ~PF_MCE_PROCESS;
+ break;
+ case 1:
+ current->flags |= PF_MCE_PROCESS;
+ if (arg3 != 0)
+ current->flags |= PF_MCE_EARLY;
+ else
+ current->flags &= ~PF_MCE_EARLY;
+ break;
+ default:
+ return -EINVAL;
+ }
+ error = 0;
+ break;
+
default:
error = -EINVAL;
break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 515bc230ac2..e06d0b8d195 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -49,6 +49,7 @@ cond_syscall(sys_sendmsg);
cond_syscall(compat_sys_sendmsg);
cond_syscall(sys_recvmsg);
cond_syscall(compat_sys_recvmsg);
+cond_syscall(compat_sys_recvfrom);
cond_syscall(sys_socketcall);
cond_syscall(sys_futex);
cond_syscall(compat_sys_futex);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0dfaa47d7cb..0d949c51741 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -26,7 +26,6 @@
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/ctype.h>
-#include <linux/utsname.h>
#include <linux/kmemcheck.h>
#include <linux/smp_lock.h>
#include <linux/fs.h>
@@ -77,6 +76,7 @@ extern int max_threads;
extern int core_uses_pid;
extern int suid_dumpable;
extern char core_pattern[];
+extern unsigned int core_pipe_limit;
extern int pid_max;
extern int min_free_kbytes;
extern int pid_max_min, pid_max_max;
@@ -163,9 +163,9 @@ extern int max_lock_depth;
#endif
#ifdef CONFIG_PROC_SYSCTL
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#endif
@@ -424,6 +424,14 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dostring,
.strategy = &sysctl_string,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "core_pipe_limit",
+ .data = &core_pipe_limit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
@@ -1390,6 +1398,31 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &scan_unevictable_handler,
},
+#ifdef CONFIG_MEMORY_FAILURE
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "memory_failure_early_kill",
+ .data = &sysctl_memory_failure_early_kill,
+ .maxlen = sizeof(sysctl_memory_failure_early_kill),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "memory_failure_recovery",
+ .data = &sysctl_memory_failure_recovery,
+ .maxlen = sizeof(sysctl_memory_failure_recovery),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
+
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
@@ -2218,7 +2251,7 @@ void sysctl_head_put(struct ctl_table_header *head)
#ifdef CONFIG_PROC_SYSCTL
static int _proc_do_string(void* data, int maxlen, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
size_t len;
@@ -2279,7 +2312,6 @@ static int _proc_do_string(void* data, int maxlen, int write,
* proc_dostring - read a string sysctl
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
@@ -2293,10 +2325,10 @@ static int _proc_do_string(void* data, int maxlen, int write,
*
* Returns 0 on success.
*/
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return _proc_do_string(table->data, table->maxlen, write, filp,
+ return _proc_do_string(table->data, table->maxlen, write,
buffer, lenp, ppos);
}
@@ -2321,7 +2353,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
}
static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
- int write, struct file *filp, void __user *buffer,
+ int write, void __user *buffer,
size_t *lenp, loff_t *ppos,
int (*conv)(int *negp, unsigned long *lvalp, int *valp,
int write, void *data),
@@ -2428,13 +2460,13 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
#undef TMPBUFLEN
}
-static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+static int do_proc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos,
int (*conv)(int *negp, unsigned long *lvalp, int *valp,
int write, void *data),
void *data)
{
- return __do_proc_dointvec(table->data, table, write, filp,
+ return __do_proc_dointvec(table->data, table, write,
buffer, lenp, ppos, conv, data);
}
@@ -2442,7 +2474,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
* proc_dointvec - read a vector of integers
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
@@ -2452,10 +2483,10 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
*
* Returns 0 on success.
*/
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+ return do_proc_dointvec(table,write,buffer,lenp,ppos,
NULL,NULL);
}
@@ -2463,7 +2494,7 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
* Taint values can only be increased
* This means we can safely use a temporary.
*/
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
@@ -2475,7 +2506,7 @@ static int proc_taint(struct ctl_table *table, int write, struct file *filp,
t = *table;
t.data = &tmptaint;
- err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
+ err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
if (err < 0)
return err;
@@ -2527,7 +2558,6 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
* proc_dointvec_minmax - read a vector of integers with min/max values
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
@@ -2540,19 +2570,18 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
*
* Returns 0 on success.
*/
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct do_proc_dointvec_minmax_conv_param param = {
.min = (int *) table->extra1,
.max = (int *) table->extra2,
};
- return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+ return do_proc_dointvec(table, write, buffer, lenp, ppos,
do_proc_dointvec_minmax_conv, &param);
}
static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos,
unsigned long convmul,
@@ -2657,21 +2686,19 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
}
static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos,
unsigned long convmul,
unsigned long convdiv)
{
return __do_proc_doulongvec_minmax(table->data, table, write,
- filp, buffer, lenp, ppos, convmul, convdiv);
+ buffer, lenp, ppos, convmul, convdiv);
}
/**
* proc_doulongvec_minmax - read a vector of long integers with min/max values
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
@@ -2684,17 +2711,16 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
*
* Returns 0 on success.
*/
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
+ return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
}
/**
* proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
@@ -2709,11 +2735,10 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp
* Returns 0 on success.
*/
int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- return do_proc_doulongvec_minmax(table, write, filp, buffer,
+ return do_proc_doulongvec_minmax(table, write, buffer,
lenp, ppos, HZ, 1000l);
}
@@ -2789,7 +2814,6 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
* proc_dointvec_jiffies - read a vector of integers as seconds
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
@@ -2801,10 +2825,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
*
* Returns 0 on success.
*/
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+ return do_proc_dointvec(table,write,buffer,lenp,ppos,
do_proc_dointvec_jiffies_conv,NULL);
}
@@ -2812,7 +2836,6 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
* proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: pointer to the file position
@@ -2824,10 +2847,10 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
*
* Returns 0 on success.
*/
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+ return do_proc_dointvec(table,write,buffer,lenp,ppos,
do_proc_dointvec_userhz_jiffies_conv,NULL);
}
@@ -2835,7 +2858,6 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
* proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
@@ -2848,14 +2870,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
*
* Returns 0 on success.
*/
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+ return do_proc_dointvec(table, write, buffer, lenp, ppos,
do_proc_dointvec_ms_jiffies_conv, NULL);
}
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct pid *new_pid;
@@ -2864,7 +2886,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
tmp = pid_vnr(cad_pid);
- r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
+ r = __do_proc_dointvec(&tmp, table, write, buffer,
lenp, ppos, NULL, NULL);
if (r || !write)
return r;
@@ -2879,50 +2901,49 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
#else /* CONFIG_PROC_FS */
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 0b0a6366c9d..ee266620b06 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,4 @@
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c
new file mode 100644
index 00000000000..86628e755f3
--- /dev/null
+++ b/kernel/time/timeconv.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+ * This file is part of the GNU C Library.
+ * Contributed by Paul Eggert (eggert@twinsun.com).
+ *
+ * The GNU C Library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * The GNU C Library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with the GNU C Library; see the file COPYING.LIB. If not,
+ * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Converts the calendar time to broken-down time representation
+ * Based on code from glibc-2.6
+ *
+ * 2009-7-14:
+ * Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
+ */
+
+#include <linux/time.h>
+#include <linux/module.h>
+
+/*
+ * Nonzero if YEAR is a leap year (every 4 years,
+ * except every 100th isn't, and every 400th is).
+ */
+static int __isleap(long year)
+{
+ return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
+}
+
+/* do a mathdiv for long type */
+static long math_div(long a, long b)
+{
+ return a / b - (a % b < 0);
+}
+
+/* How many leap years between y1 and y2, y1 must less or equal to y2 */
+static long leaps_between(long y1, long y2)
+{
+ long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
+ + math_div(y1 - 1, 400);
+ long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
+ + math_div(y2 - 1, 400);
+ return leaps2 - leaps1;
+}
+
+/* How many days come before each month (0-12). */
+static const unsigned short __mon_yday[2][13] = {
+ /* Normal years. */
+ {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
+ /* Leap years. */
+ {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
+};
+
+#define SECS_PER_HOUR (60 * 60)
+#define SECS_PER_DAY (SECS_PER_HOUR * 24)
+
+/**
+ * time_to_tm - converts the calendar time to local broken-down time
+ *
+ * @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970,
+ * Coordinated Universal Time (UTC).
+ * @offset offset seconds adding to totalsecs.
+ * @result pointer to struct tm variable to receive broken-down time
+ */
+void time_to_tm(time_t totalsecs, int offset, struct tm *result)
+{
+ long days, rem, y;
+ const unsigned short *ip;
+
+ days = totalsecs / SECS_PER_DAY;
+ rem = totalsecs % SECS_PER_DAY;
+ rem += offset;
+ while (rem < 0) {
+ rem += SECS_PER_DAY;
+ --days;
+ }
+ while (rem >= SECS_PER_DAY) {
+ rem -= SECS_PER_DAY;
+ ++days;
+ }
+
+ result->tm_hour = rem / SECS_PER_HOUR;
+ rem %= SECS_PER_HOUR;
+ result->tm_min = rem / 60;
+ result->tm_sec = rem % 60;
+
+ /* January 1, 1970 was a Thursday. */
+ result->tm_wday = (4 + days) % 7;
+ if (result->tm_wday < 0)
+ result->tm_wday += 7;
+
+ y = 1970;
+
+ while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
+ /* Guess a corrected year, assuming 365 days per year. */
+ long yg = y + math_div(days, 365);
+
+ /* Adjust DAYS and Y to match the guessed year. */
+ days -= (yg - y) * 365 + leaps_between(y, yg);
+ y = yg;
+ }
+
+ result->tm_year = y - 1900;
+
+ result->tm_yday = days;
+
+ ip = __mon_yday[__isleap(y)];
+ for (y = 11; days < ip[y]; y--)
+ continue;
+ days -= ip[y];
+
+ result->tm_mon = y;
+ result->tm_mday = days + 1;
+}
+EXPORT_SYMBOL(time_to_tm);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 23df7771c93..a142579765b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3015,7 +3015,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
int
ftrace_enable_sysctl(struct ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
@@ -3025,7 +3025,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
mutex_lock(&ftrace_lock);
- ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
goto out;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6c0f6a8a22e..411af37f4be 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1984,11 +1984,9 @@ __tracing_open(struct inode *inode, struct file *file)
if (current_trace)
*iter->trace = *current_trace;
- if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
goto fail;
- cpumask_clear(iter->started);
-
if (current_trace && current_trace->print_max)
iter->tr = &max_tr;
else
@@ -4389,7 +4387,7 @@ __init static int tracer_alloc_buffers(void)
if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
goto out_free_buffer_mask;
- if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
goto out_free_tracing_cpumask;
/* To save memory, keep the ring buffer size to its minimum */
@@ -4400,7 +4398,6 @@ __init static int tracer_alloc_buffers(void)
cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
cpumask_copy(tracing_cpumask, cpu_all_mask);
- cpumask_clear(tracing_reader_cpumask);
/* TODO: make the number of buffers hot pluggable with CPUS */
global_trace.buffer = ring_buffer_alloc(ring_buf_size,
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 0f6facb050a..8504ac71e4e 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = {
int
stack_trace_sysctl(struct ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
mutex_lock(&stack_sysctl_mutex);
- ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret || !write ||
(last_stack_tracer_enabled == !!stack_tracer_enabled))
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 0314501688b..419209893d8 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -4,7 +4,6 @@
*/
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/mman.h>
#include <linux/notifier.h>
#include <linux/reboot.h>
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 92359cc747a..69eae358a72 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -42,14 +42,14 @@ static void put_uts(ctl_table *table, int write, void *which)
* Special case of dostring for the UTS structure. This has locks
* to observe. Should this be in kernel/sys.c ????
*/
-static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table uts_table;
int r;
memcpy(&uts_table, table, sizeof(uts_table));
uts_table.data = get_uts(table, write);
- r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos);
+ r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
put_uts(table, write, uts_table.data);
return r;
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d57b12f59c8..891155817bc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -50,6 +50,14 @@ config MAGIC_SYSRQ
keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
unless you really know what this hack does.
+config STRIP_ASM_SYMS
+ bool "Strip assembler-generated symbols during link"
+ default n
+ help
+ Strip internal assembler-generated symbols during a link (symbols
+ that look like '.Lxxx') so they don't pollute the output of
+ get_wchan() and suchlike.
+
config UNUSED_SYMBOLS
bool "Enable unused/obsolete exported symbols"
default y if X86
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index 68dfce59c1b..fc686c7a0a0 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -27,6 +27,11 @@
#define GZIP_IOBUF_SIZE (16*1024)
+static int nofill(void *buffer, unsigned int len)
+{
+ return -1;
+}
+
/* Included from initramfs et al code */
STATIC int INIT gunzip(unsigned char *buf, int len,
int(*fill)(void*, unsigned int),
@@ -76,6 +81,9 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
goto gunzip_nomem4;
}
+ if (!fill)
+ fill = nofill;
+
if (len == 0)
len = fill(zbuf, GZIP_IOBUF_SIZE);
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 0b954e04bd3..ca82fde81c8 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -82,6 +82,11 @@ struct rc {
#define RC_MODEL_TOTAL_BITS 11
+static int nofill(void *buffer, unsigned int len)
+{
+ return -1;
+}
+
/* Called twice: once at startup and once in rc_normalize() */
static void INIT rc_read(struct rc *rc)
{
@@ -97,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc,
int (*fill)(void*, unsigned int),
char *buffer, int buffer_size)
{
- rc->fill = fill;
+ if (fill)
+ rc->fill = fill;
+ else
+ rc->fill = nofill;
rc->buffer = (uint8_t *)buffer;
rc->buffer_size = buffer_size;
rc->buffer_end = rc->buffer + rc->buffer_size;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 73a14b8c6d1..b91839e9e89 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -671,7 +671,7 @@ static char *ip4_string(char *p, const u8 *addr, bool leading_zeros)
return p;
}
-static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
+static char *ip6_compressed_string(char *p, const char *addr)
{
int i;
int j;
@@ -683,7 +683,12 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
u8 hi;
u8 lo;
bool needcolon = false;
- bool useIPv4 = ipv6_addr_v4mapped(addr) || ipv6_addr_is_isatap(addr);
+ bool useIPv4;
+ struct in6_addr in6;
+
+ memcpy(&in6, addr, sizeof(struct in6_addr));
+
+ useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);
memset(zerolength, 0, sizeof(zerolength));
@@ -695,7 +700,7 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
/* find position of longest 0 run */
for (i = 0; i < range; i++) {
for (j = i; j < range; j++) {
- if (addr->s6_addr16[j] != 0)
+ if (in6.s6_addr16[j] != 0)
break;
zerolength[i]++;
}
@@ -722,7 +727,7 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
needcolon = false;
}
/* hex u16 without leading 0s */
- word = ntohs(addr->s6_addr16[i]);
+ word = ntohs(in6.s6_addr16[i]);
hi = word >> 8;
lo = word & 0xff;
if (hi) {
@@ -741,19 +746,19 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
if (useIPv4) {
if (needcolon)
*p++ = ':';
- p = ip4_string(p, &addr->s6_addr[12], false);
+ p = ip4_string(p, &in6.s6_addr[12], false);
}
*p = '\0';
return p;
}
-static char *ip6_string(char *p, const struct in6_addr *addr, const char *fmt)
+static char *ip6_string(char *p, const char *addr, const char *fmt)
{
int i;
for (i = 0; i < 8; i++) {
- p = pack_hex_byte(p, addr->s6_addr[2 * i]);
- p = pack_hex_byte(p, addr->s6_addr[2 * i + 1]);
+ p = pack_hex_byte(p, *addr++);
+ p = pack_hex_byte(p, *addr++);
if (fmt[0] == 'I' && i != 7)
*p++ = ':';
}
@@ -768,9 +773,9 @@ static char *ip6_addr_string(char *buf, char *end, const u8 *addr,
char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];
if (fmt[0] == 'I' && fmt[2] == 'c')
- ip6_compressed_string(ip6_addr, (const struct in6_addr *)addr);
+ ip6_compressed_string(ip6_addr, addr);
else
- ip6_string(ip6_addr, (const struct in6_addr *)addr, fmt);
+ ip6_string(ip6_addr, addr, fmt);
return string(buf, end, ip6_addr, spec);
}
diff --git a/mm/Kconfig b/mm/Kconfig
index 71eb0b4cce8..24776072959 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -245,6 +245,20 @@ config DEFAULT_MMAP_MIN_ADDR
/proc/sys/vm/mmap_min_addr tunable.
+config MEMORY_FAILURE
+ depends on MMU
+ depends on X86_MCE
+ bool "Enable recovery from hardware memory errors"
+ help
+ Enables code to recover from some memory failures on systems
+ with MCA recovery. This allows a system to continue running
+ even when some of its memory has uncorrected errors. This requires
+ special hardware support and typically ECC memory.
+
+config HWPOISON_INJECT
+ tristate "Poison pages injector"
+ depends on MEMORY_FAILURE && DEBUG_KERNEL
+
config NOMMU_INITIAL_TRIM_EXCESS
int "Turn on mmap() excess space trimming before booting"
depends on !MMU
diff --git a/mm/Makefile b/mm/Makefile
index 88193d73cd1..515fd793c17 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -41,5 +41,7 @@ obj-$(CONFIG_SMP) += allocpercpu.o
endif
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
+obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
diff --git a/mm/filemap.c b/mm/filemap.c
index bcc7372aebb..6c84e598b4a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,7 +58,7 @@
/*
* Lock ordering:
*
- * ->i_mmap_lock (vmtruncate)
+ * ->i_mmap_lock (truncate_pagecache)
* ->private_lock (__free_pte->__set_page_dirty_buffers)
* ->swap_lock (exclusive_swap_page, others)
* ->mapping->tree_lock
@@ -104,6 +104,10 @@
*
* ->task->proc_lock
* ->dcache_lock (proc_pid_lookup)
+ *
+ * (code doesn't rely on that order, so you could switch it around)
+ * ->tasklist_lock (memory_failure, collect_procs_ao)
+ * ->i_mmap_lock
*/
/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 815dbd4a6dc..6f048fcc749 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1537,7 +1537,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
#ifdef CONFIG_SYSCTL
int hugetlb_sysctl_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer,
+ void __user *buffer,
size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
@@ -1548,7 +1548,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
table->data = &tmp;
table->maxlen = sizeof(unsigned long);
- proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+ proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (write)
h->max_huge_pages = set_max_huge_pages(h, tmp);
@@ -1557,10 +1557,10 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
}
int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer,
+ void __user *buffer,
size_t *length, loff_t *ppos)
{
- proc_dointvec(table, write, file, buffer, length, ppos);
+ proc_dointvec(table, write, buffer, length, ppos);
if (hugepages_treat_as_movable)
htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
else
@@ -1569,7 +1569,7 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
}
int hugetlb_overcommit_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer,
+ void __user *buffer,
size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
@@ -1580,7 +1580,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
table->data = &tmp;
table->maxlen = sizeof(unsigned long);
- proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+ proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (write) {
spin_lock(&hugetlb_lock);
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
new file mode 100644
index 00000000000..e1d85137f08
--- /dev/null
+++ b/mm/hwpoison-inject.c
@@ -0,0 +1,41 @@
+/* Inject a hwpoison memory failure on a arbitary pfn */
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+static struct dentry *hwpoison_dir, *corrupt_pfn;
+
+static int hwpoison_inject(void *data, u64 val)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ printk(KERN_INFO "Injecting memory failure at pfn %Lx\n", val);
+ return __memory_failure(val, 18, 0);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
+
+static void pfn_inject_exit(void)
+{
+ if (hwpoison_dir)
+ debugfs_remove_recursive(hwpoison_dir);
+}
+
+static int pfn_inject_init(void)
+{
+ hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
+ if (hwpoison_dir == NULL)
+ return -ENOMEM;
+ corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
+ NULL, &hwpoison_fops);
+ if (corrupt_pfn == NULL) {
+ pfn_inject_exit();
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+module_init(pfn_inject_init);
+module_exit(pfn_inject_exit);
+MODULE_LICENSE("GPL");
diff --git a/mm/ksm.c b/mm/ksm.c
index 37cc3732509..f7edac356f4 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -30,6 +30,7 @@
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
#include <linux/ksm.h>
#include <asm/tlbflush.h>
@@ -162,10 +163,10 @@ static unsigned long ksm_pages_unshared;
static unsigned long ksm_rmap_items;
/* Limit on the number of unswappable pages used */
-static unsigned long ksm_max_kernel_pages = 2000;
+static unsigned long ksm_max_kernel_pages;
/* Number of pages ksmd should scan in one batch */
-static unsigned int ksm_thread_pages_to_scan = 200;
+static unsigned int ksm_thread_pages_to_scan = 100;
/* Milliseconds ksmd should sleep between batches */
static unsigned int ksm_thread_sleep_millisecs = 20;
@@ -173,7 +174,7 @@ static unsigned int ksm_thread_sleep_millisecs = 20;
#define KSM_RUN_STOP 0
#define KSM_RUN_MERGE 1
#define KSM_RUN_UNMERGE 2
-static unsigned int ksm_run = KSM_RUN_MERGE;
+static unsigned int ksm_run = KSM_RUN_STOP;
static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
static DEFINE_MUTEX(ksm_thread_mutex);
@@ -183,6 +184,11 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock);
sizeof(struct __struct), __alignof__(struct __struct),\
(__flags), NULL)
+static void __init ksm_init_max_kernel_pages(void)
+{
+ ksm_max_kernel_pages = nr_free_buffer_pages() / 4;
+}
+
static int __init ksm_slab_init(void)
{
rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
@@ -1667,6 +1673,8 @@ static int __init ksm_init(void)
struct task_struct *ksm_thread;
int err;
+ ksm_init_max_kernel_pages();
+
err = ksm_slab_init();
if (err)
goto out;
diff --git a/mm/madvise.c b/mm/madvise.c
index d9ae2067952..35b1479b7c9 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -218,6 +218,32 @@ static long madvise_remove(struct vm_area_struct *vma,
return error;
}
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Error injection support for memory error handling.
+ */
+static int madvise_hwpoison(unsigned long start, unsigned long end)
+{
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ for (; start < end; start += PAGE_SIZE) {
+ struct page *p;
+ int ret = get_user_pages(current, current->mm, start, 1,
+ 0, 0, &p, NULL);
+ if (ret != 1)
+ return ret;
+ printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
+ page_to_pfn(p), start);
+ /* Ignore return value for now */
+ __memory_failure(page_to_pfn(p), 0, 1);
+ put_page(p);
+ }
+ return ret;
+}
+#endif
+
static long
madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
unsigned long start, unsigned long end, int behavior)
@@ -308,6 +334,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
int write;
size_t len;
+#ifdef CONFIG_MEMORY_FAILURE
+ if (behavior == MADV_HWPOISON)
+ return madvise_hwpoison(start, start+len_in);
+#endif
if (!madvise_behavior_valid(behavior))
return error;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9b10d875378..e2b98a6875c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -29,6 +29,7 @@
#include <linux/rcupdate.h>
#include <linux/limits.h>
#include <linux/mutex.h>
+#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/spinlock.h>
@@ -43,6 +44,7 @@
struct cgroup_subsys mem_cgroup_subsys __read_mostly;
#define MEM_CGROUP_RECLAIM_RETRIES 5
+struct mem_cgroup *root_mem_cgroup __read_mostly;
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
@@ -53,6 +55,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/
#endif
static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */
+#define SOFTLIMIT_EVENTS_THRESH (1000)
/*
* Statistics for memory cgroup.
@@ -66,6 +69,8 @@ enum mem_cgroup_stat_index {
MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */
MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */
MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */
+ MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */
+ MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
MEM_CGROUP_STAT_NSTATS,
};
@@ -78,6 +83,20 @@ struct mem_cgroup_stat {
struct mem_cgroup_stat_cpu cpustat[0];
};
+static inline void
+__mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat,
+ enum mem_cgroup_stat_index idx)
+{
+ stat->count[idx] = 0;
+}
+
+static inline s64
+__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
+ enum mem_cgroup_stat_index idx)
+{
+ return stat->count[idx];
+}
+
/*
* For accounting under irq disable, no need for increment preempt count.
*/
@@ -117,6 +136,12 @@ struct mem_cgroup_per_zone {
unsigned long count[NR_LRU_LISTS];
struct zone_reclaim_stat reclaim_stat;
+ struct rb_node tree_node; /* RB tree node */
+ unsigned long long usage_in_excess;/* Set to the value by which */
+ /* the soft limit is exceeded*/
+ bool on_tree;
+ struct mem_cgroup *mem; /* Back pointer, we cannot */
+ /* use container_of */
};
/* Macro for accessing counter */
#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)])
@@ -130,6 +155,26 @@ struct mem_cgroup_lru_info {
};
/*
+ * Cgroups above their limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation
+ */
+
+struct mem_cgroup_tree_per_zone {
+ struct rb_root rb_root;
+ spinlock_t lock;
+};
+
+struct mem_cgroup_tree_per_node {
+ struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
+};
+
+struct mem_cgroup_tree {
+ struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
+};
+
+static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+
+/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
* statistics based on the statistics developed by Rik Van Riel for clock-pro,
@@ -186,6 +231,13 @@ struct mem_cgroup {
struct mem_cgroup_stat stat;
};
+/*
+ * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
+ * limit reclaim to prevent infinite loops, if they ever occur.
+ */
+#define MEM_CGROUP_MAX_RECLAIM_LOOPS (100)
+#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2)
+
enum charge_type {
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
MEM_CGROUP_CHARGE_TYPE_MAPPED,
@@ -200,13 +252,8 @@ enum charge_type {
#define PCGF_CACHE (1UL << PCG_CACHE)
#define PCGF_USED (1UL << PCG_USED)
#define PCGF_LOCK (1UL << PCG_LOCK)
-static const unsigned long
-pcg_default_flags[NR_CHARGE_TYPE] = {
- PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
- PCGF_USED | PCGF_LOCK, /* Anon */
- PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
- 0, /* FORCE */
-};
+/* Not used, but added here for completeness */
+#define PCGF_ACCT (1UL << PCG_ACCT)
/* for encoding cft->private value on file */
#define _MEM (0)
@@ -215,15 +262,241 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff)
#define MEMFILE_ATTR(val) ((val) & 0xffff)
+/*
+ * Reclaim flags for mem_cgroup_hierarchical_reclaim
+ */
+#define MEM_CGROUP_RECLAIM_NOSWAP_BIT 0x0
+#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
+#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
+#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
+#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
+#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
+
static void mem_cgroup_get(struct mem_cgroup *mem);
static void mem_cgroup_put(struct mem_cgroup *mem);
static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
+static struct mem_cgroup_per_zone *
+mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
+{
+ return &mem->info.nodeinfo[nid]->zoneinfo[zid];
+}
+
+static struct mem_cgroup_per_zone *
+page_cgroup_zoneinfo(struct page_cgroup *pc)
+{
+ struct mem_cgroup *mem = pc->mem_cgroup;
+ int nid = page_cgroup_nid(pc);
+ int zid = page_cgroup_zid(pc);
+
+ if (!mem)
+ return NULL;
+
+ return mem_cgroup_zoneinfo(mem, nid, zid);
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_node_zone(int nid, int zid)
+{
+ return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_from_page(struct page *page)
+{
+ int nid = page_to_nid(page);
+ int zid = page_zonenum(page);
+
+ return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static void
+__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
+{
+ struct rb_node **p = &mctz->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct mem_cgroup_per_zone *mz_node;
+
+ if (mz->on_tree)
+ return;
+
+ mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+ while (*p) {
+ parent = *p;
+ mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
+ tree_node);
+ if (mz->usage_in_excess < mz_node->usage_in_excess)
+ p = &(*p)->rb_left;
+ /*
+ * We can't avoid mem cgroups that are over their soft
+ * limit by the same amount
+ */
+ else if (mz->usage_in_excess >= mz_node->usage_in_excess)
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&mz->tree_node, parent, p);
+ rb_insert_color(&mz->tree_node, &mctz->rb_root);
+ mz->on_tree = true;
+}
+
+static void
+__mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
+{
+ if (!mz->on_tree)
+ return;
+ rb_erase(&mz->tree_node, &mctz->rb_root);
+ mz->on_tree = false;
+}
+
+static void
+mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
+{
+ spin_lock(&mctz->lock);
+ __mem_cgroup_insert_exceeded(mem, mz, mctz);
+ spin_unlock(&mctz->lock);
+}
+
+static void
+mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
+{
+ spin_lock(&mctz->lock);
+ __mem_cgroup_remove_exceeded(mem, mz, mctz);
+ spin_unlock(&mctz->lock);
+}
+
+static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
+{
+ bool ret = false;
+ int cpu;
+ s64 val;
+ struct mem_cgroup_stat_cpu *cpustat;
+
+ cpu = get_cpu();
+ cpustat = &mem->stat.cpustat[cpu];
+ val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS);
+ if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) {
+ __mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS);
+ ret = true;
+ }
+ put_cpu();
+ return ret;
+}
+
+static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
+{
+ unsigned long long prev_usage_in_excess, new_usage_in_excess;
+ bool updated_tree = false;
+ struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_tree_per_zone *mctz;
+
+ mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
+ mctz = soft_limit_tree_from_page(page);
+
+ /*
+ * We do updates in lazy mode, mem's are removed
+ * lazily from the per-zone, per-node rb tree
+ */
+ prev_usage_in_excess = mz->usage_in_excess;
+
+ new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+ if (prev_usage_in_excess) {
+ mem_cgroup_remove_exceeded(mem, mz, mctz);
+ updated_tree = true;
+ }
+ if (!new_usage_in_excess)
+ goto done;
+ mem_cgroup_insert_exceeded(mem, mz, mctz);
+
+done:
+ if (updated_tree) {
+ spin_lock(&mctz->lock);
+ mz->usage_in_excess = new_usage_in_excess;
+ spin_unlock(&mctz->lock);
+ }
+}
+
+static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
+{
+ int node, zone;
+ struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_tree_per_zone *mctz;
+
+ for_each_node_state(node, N_POSSIBLE) {
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+ mz = mem_cgroup_zoneinfo(mem, node, zone);
+ mctz = soft_limit_tree_node_zone(node, zone);
+ mem_cgroup_remove_exceeded(mem, mz, mctz);
+ }
+ }
+}
+
+static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem)
+{
+ return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT;
+}
+
+static struct mem_cgroup_per_zone *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+ struct rb_node *rightmost = NULL;
+ struct mem_cgroup_per_zone *mz = NULL;
+
+retry:
+ rightmost = rb_last(&mctz->rb_root);
+ if (!rightmost)
+ goto done; /* Nothing to reclaim from */
+
+ mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
+ /*
+ * Remove the node now but someone else can add it back,
+ * we will to add it back at the end of reclaim to its correct
+ * position in the tree.
+ */
+ __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+ if (!res_counter_soft_limit_excess(&mz->mem->res) ||
+ !css_tryget(&mz->mem->css))
+ goto retry;
+done:
+ return mz;
+}
+
+static struct mem_cgroup_per_zone *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+ struct mem_cgroup_per_zone *mz;
+
+ spin_lock(&mctz->lock);
+ mz = __mem_cgroup_largest_soft_limit_node(mctz);
+ spin_unlock(&mctz->lock);
+ return mz;
+}
+
+static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+ bool charge)
+{
+ int val = (charge) ? 1 : -1;
+ struct mem_cgroup_stat *stat = &mem->stat;
+ struct mem_cgroup_stat_cpu *cpustat;
+ int cpu = get_cpu();
+
+ cpustat = &stat->cpustat[cpu];
+ __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
+ put_cpu();
+}
+
static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
struct page_cgroup *pc,
bool charge)
{
- int val = (charge)? 1 : -1;
+ int val = (charge) ? 1 : -1;
struct mem_cgroup_stat *stat = &mem->stat;
struct mem_cgroup_stat_cpu *cpustat;
int cpu = get_cpu();
@@ -240,28 +513,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
else
__mem_cgroup_stat_add_safe(cpustat,
MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
+ __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1);
put_cpu();
}
-static struct mem_cgroup_per_zone *
-mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
-{
- return &mem->info.nodeinfo[nid]->zoneinfo[zid];
-}
-
-static struct mem_cgroup_per_zone *
-page_cgroup_zoneinfo(struct page_cgroup *pc)
-{
- struct mem_cgroup *mem = pc->mem_cgroup;
- int nid = page_cgroup_nid(pc);
- int zid = page_cgroup_zid(pc);
-
- if (!mem)
- return NULL;
-
- return mem_cgroup_zoneinfo(mem, nid, zid);
-}
-
static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
enum lru_list idx)
{
@@ -354,6 +609,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
return ret;
}
+static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
+{
+ return (mem == root_mem_cgroup);
+}
+
/*
* Following LRU functions are allowed to be used without PCG_LOCK.
* Operations are called by routine of global LRU independently from memcg.
@@ -371,22 +631,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
{
struct page_cgroup *pc;
- struct mem_cgroup *mem;
struct mem_cgroup_per_zone *mz;
if (mem_cgroup_disabled())
return;
pc = lookup_page_cgroup(page);
/* can happen while we handle swapcache. */
- if (list_empty(&pc->lru) || !pc->mem_cgroup)
+ if (!TestClearPageCgroupAcctLRU(pc))
return;
+ VM_BUG_ON(!pc->mem_cgroup);
/*
* We don't check PCG_USED bit. It's cleared when the "page" is finally
* removed from global LRU.
*/
mz = page_cgroup_zoneinfo(pc);
- mem = pc->mem_cgroup;
MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+ if (mem_cgroup_is_root(pc->mem_cgroup))
+ return;
+ VM_BUG_ON(list_empty(&pc->lru));
list_del_init(&pc->lru);
return;
}
@@ -410,8 +672,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
* For making pc->mem_cgroup visible, insert smp_rmb() here.
*/
smp_rmb();
- /* unused page is not rotated. */
- if (!PageCgroupUsed(pc))
+ /* unused or root page is not rotated. */
+ if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup))
return;
mz = page_cgroup_zoneinfo(pc);
list_move(&pc->lru, &mz->lists[lru]);
@@ -425,6 +687,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
if (mem_cgroup_disabled())
return;
pc = lookup_page_cgroup(page);
+ VM_BUG_ON(PageCgroupAcctLRU(pc));
/*
* Used bit is set without atomic ops but after smp_wmb().
* For making pc->mem_cgroup visible, insert smp_rmb() here.
@@ -435,6 +698,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
mz = page_cgroup_zoneinfo(pc);
MEM_CGROUP_ZSTAT(mz, lru) += 1;
+ SetPageCgroupAcctLRU(pc);
+ if (mem_cgroup_is_root(pc->mem_cgroup))
+ return;
list_add(&pc->lru, &mz->lists[lru]);
}
@@ -469,7 +735,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
spin_lock_irqsave(&zone->lru_lock, flags);
/* link when the page is linked to LRU but page_cgroup isn't */
- if (PageLRU(page) && list_empty(&pc->lru))
+ if (PageLRU(page) && !PageCgroupAcctLRU(pc))
mem_cgroup_add_lru_list(page, page_lru(page));
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
@@ -855,28 +1121,62 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
* If shrink==true, for avoiding to free too much, this returns immedieately.
*/
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
- gfp_t gfp_mask, bool noswap, bool shrink)
+ struct zone *zone,
+ gfp_t gfp_mask,
+ unsigned long reclaim_options)
{
struct mem_cgroup *victim;
int ret, total = 0;
int loop = 0;
+ bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
+ bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
+ bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
+ unsigned long excess = mem_cgroup_get_excess(root_mem);
/* If memsw_is_minimum==1, swap-out is of-no-use. */
if (root_mem->memsw_is_minimum)
noswap = true;
- while (loop < 2) {
+ while (1) {
victim = mem_cgroup_select_victim(root_mem);
- if (victim == root_mem)
+ if (victim == root_mem) {
loop++;
+ if (loop >= 2) {
+ /*
+ * If we have not been able to reclaim
+ * anything, it might because there are
+ * no reclaimable pages under this hierarchy
+ */
+ if (!check_soft || !total) {
+ css_put(&victim->css);
+ break;
+ }
+ /*
+ * We want to do more targetted reclaim.
+ * excess >> 2 is not to excessive so as to
+ * reclaim too much, nor too less that we keep
+ * coming back to reclaim from this cgroup
+ */
+ if (total >= (excess >> 2) ||
+ (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) {
+ css_put(&victim->css);
+ break;
+ }
+ }
+ }
if (!mem_cgroup_local_usage(&victim->stat)) {
/* this cgroup's local usage == 0 */
css_put(&victim->css);
continue;
}
/* we use swappiness of local cgroup */
- ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
- get_swappiness(victim));
+ if (check_soft)
+ ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
+ noswap, get_swappiness(victim), zone,
+ zone->zone_pgdat->node_id);
+ else
+ ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
+ noswap, get_swappiness(victim));
css_put(&victim->css);
/*
* At shrinking usage, we can't check we should stop here or
@@ -886,7 +1186,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
if (shrink)
return ret;
total += ret;
- if (mem_cgroup_check_under_limit(root_mem))
+ if (check_soft) {
+ if (res_counter_check_under_soft_limit(&root_mem->res))
+ return total;
+ } else if (mem_cgroup_check_under_limit(root_mem))
return 1 + total;
}
return total;
@@ -965,11 +1268,11 @@ done:
*/
static int __mem_cgroup_try_charge(struct mm_struct *mm,
gfp_t gfp_mask, struct mem_cgroup **memcg,
- bool oom)
+ bool oom, struct page *page)
{
- struct mem_cgroup *mem, *mem_over_limit;
+ struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit;
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
- struct res_counter *fail_res;
+ struct res_counter *fail_res, *soft_fail_res = NULL;
if (unlikely(test_thread_flag(TIF_MEMDIE))) {
/* Don't account this! */
@@ -996,20 +1299,23 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
VM_BUG_ON(css_is_removed(&mem->css));
while (1) {
- int ret;
- bool noswap = false;
+ int ret = 0;
+ unsigned long flags = 0;
- ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
+ if (mem_cgroup_is_root(mem))
+ goto done;
+ ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
+ &soft_fail_res);
if (likely(!ret)) {
if (!do_swap_account)
break;
ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
- &fail_res);
+ &fail_res, NULL);
if (likely(!ret))
break;
/* mem+swap counter fails */
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- noswap = true;
+ res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+ flags |= MEM_CGROUP_RECLAIM_NOSWAP;
mem_over_limit = mem_cgroup_from_res_counter(fail_res,
memsw);
} else
@@ -1020,8 +1326,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
if (!(gfp_mask & __GFP_WAIT))
goto nomem;
- ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
- noswap, false);
+ ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
+ gfp_mask, flags);
if (ret)
continue;
@@ -1046,13 +1352,24 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
goto nomem;
}
}
+ /*
+ * Insert just the ancestor, we should trickle down to the correct
+ * cgroup for reclaim, since the other nodes will be below their
+ * soft limit
+ */
+ if (soft_fail_res) {
+ mem_over_soft_limit =
+ mem_cgroup_from_res_counter(soft_fail_res, res);
+ if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
+ mem_cgroup_update_tree(mem_over_soft_limit, page);
+ }
+done:
return 0;
nomem:
css_put(&mem->css);
return -ENOMEM;
}
-
/*
* A helper function to get mem_cgroup from ID. must be called under
* rcu_read_lock(). The caller must check css_is_removed() or some if
@@ -1119,15 +1436,38 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
lock_page_cgroup(pc);
if (unlikely(PageCgroupUsed(pc))) {
unlock_page_cgroup(pc);
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- if (do_swap_account)
- res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(mem)) {
+ res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+ if (do_swap_account)
+ res_counter_uncharge(&mem->memsw, PAGE_SIZE,
+ NULL);
+ }
css_put(&mem->css);
return;
}
+
pc->mem_cgroup = mem;
+ /*
+ * We access a page_cgroup asynchronously without lock_page_cgroup().
+ * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
+ * is accessed after testing USED bit. To make pc->mem_cgroup visible
+ * before USED bit, we need memory barrier here.
+ * See mem_cgroup_add_lru_list(), etc.
+ */
smp_wmb();
- pc->flags = pcg_default_flags[ctype];
+ switch (ctype) {
+ case MEM_CGROUP_CHARGE_TYPE_CACHE:
+ case MEM_CGROUP_CHARGE_TYPE_SHMEM:
+ SetPageCgroupCache(pc);
+ SetPageCgroupUsed(pc);
+ break;
+ case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+ ClearPageCgroupCache(pc);
+ SetPageCgroupUsed(pc);
+ break;
+ default:
+ break;
+ }
mem_cgroup_charge_statistics(mem, pc, true);
@@ -1178,7 +1518,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
if (pc->mem_cgroup != from)
goto out;
- res_counter_uncharge(&from->res, PAGE_SIZE);
+ if (!mem_cgroup_is_root(from))
+ res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
mem_cgroup_charge_statistics(from, pc, false);
page = pc->page;
@@ -1197,8 +1538,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1);
}
- if (do_swap_account)
- res_counter_uncharge(&from->memsw, PAGE_SIZE);
+ if (do_swap_account && !mem_cgroup_is_root(from))
+ res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
css_put(&from->css);
css_get(&to->css);
@@ -1238,7 +1579,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
parent = mem_cgroup_from_cont(pcg);
- ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
+ ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page);
if (ret || !parent)
return ret;
@@ -1268,9 +1609,11 @@ uncharge:
/* drop extra refcnt by try_charge() */
css_put(&parent->css);
/* uncharge if move fails */
- res_counter_uncharge(&parent->res, PAGE_SIZE);
- if (do_swap_account)
- res_counter_uncharge(&parent->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(parent)) {
+ res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
+ if (do_swap_account)
+ res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+ }
return ret;
}
@@ -1295,7 +1638,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
prefetchw(pc);
mem = memcg;
- ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
+ ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page);
if (ret || !mem)
return ret;
@@ -1414,14 +1757,14 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
if (!mem)
goto charge_cur_mm;
*ptr = mem;
- ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
+ ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page);
/* drop extra refcnt from tryget */
css_put(&mem->css);
return ret;
charge_cur_mm:
if (unlikely(!mm))
mm = &init_mm;
- return __mem_cgroup_try_charge(mm, mask, ptr, true);
+ return __mem_cgroup_try_charge(mm, mask, ptr, true, page);
}
static void
@@ -1459,7 +1802,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
* This recorded memcg can be obsolete one. So, avoid
* calling css_tryget
*/
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(memcg))
+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
+ NULL);
+ mem_cgroup_swap_statistics(memcg, false);
mem_cgroup_put(memcg);
}
rcu_read_unlock();
@@ -1484,9 +1830,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
return;
if (!mem)
return;
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- if (do_swap_account)
- res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(mem)) {
+ res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+ if (do_swap_account)
+ res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+ }
css_put(&mem->css);
}
@@ -1500,6 +1848,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
struct page_cgroup *pc;
struct mem_cgroup *mem = NULL;
struct mem_cgroup_per_zone *mz;
+ bool soft_limit_excess = false;
if (mem_cgroup_disabled())
return NULL;
@@ -1538,9 +1887,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
break;
}
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
- res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(mem)) {
+ res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
+ if (do_swap_account &&
+ (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
+ res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+ }
+ if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
+ mem_cgroup_swap_statistics(mem, true);
mem_cgroup_charge_statistics(mem, pc, false);
ClearPageCgroupUsed(pc);
@@ -1554,6 +1908,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
mz = page_cgroup_zoneinfo(pc);
unlock_page_cgroup(pc);
+ if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
+ mem_cgroup_update_tree(mem, page);
/* at swapout, this memcg will be accessed to record to swap */
if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
css_put(&mem->css);
@@ -1629,7 +1985,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
* We uncharge this because swap is freed.
* This memcg can be obsolete one. We avoid calling css_tryget
*/
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(memcg))
+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+ mem_cgroup_swap_statistics(memcg, false);
mem_cgroup_put(memcg);
}
rcu_read_unlock();
@@ -1658,7 +2016,8 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
unlock_page_cgroup(pc);
if (mem) {
- ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
+ ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false,
+ page);
css_put(&mem->css);
}
*ptr = mem;
@@ -1798,8 +2157,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
if (!ret)
break;
- progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
- false, true);
+ progress = mem_cgroup_hierarchical_reclaim(memcg, NULL,
+ GFP_KERNEL,
+ MEM_CGROUP_RECLAIM_SHRINK);
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
@@ -1851,7 +2211,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
if (!ret)
break;
- mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true);
+ mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
+ MEM_CGROUP_RECLAIM_NOSWAP |
+ MEM_CGROUP_RECLAIM_SHRINK);
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
@@ -1862,6 +2224,97 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
return ret;
}
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask, int nid,
+ int zid)
+{
+ unsigned long nr_reclaimed = 0;
+ struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+ unsigned long reclaimed;
+ int loop = 0;
+ struct mem_cgroup_tree_per_zone *mctz;
+
+ if (order > 0)
+ return 0;
+
+ mctz = soft_limit_tree_node_zone(nid, zid);
+ /*
+ * This loop can run a while, specially if mem_cgroup's continuously
+ * keep exceeding their soft limit and putting the system under
+ * pressure
+ */
+ do {
+ if (next_mz)
+ mz = next_mz;
+ else
+ mz = mem_cgroup_largest_soft_limit_node(mctz);
+ if (!mz)
+ break;
+
+ reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
+ gfp_mask,
+ MEM_CGROUP_RECLAIM_SOFT);
+ nr_reclaimed += reclaimed;
+ spin_lock(&mctz->lock);
+
+ /*
+ * If we failed to reclaim anything from this memory cgroup
+ * it is time to move on to the next cgroup
+ */
+ next_mz = NULL;
+ if (!reclaimed) {
+ do {
+ /*
+ * Loop until we find yet another one.
+ *
+ * By the time we get the soft_limit lock
+ * again, someone might have aded the
+ * group back on the RB tree. Iterate to
+ * make sure we get a different mem.
+ * mem_cgroup_largest_soft_limit_node returns
+ * NULL if no other cgroup is present on
+ * the tree
+ */
+ next_mz =
+ __mem_cgroup_largest_soft_limit_node(mctz);
+ if (next_mz == mz) {
+ css_put(&next_mz->mem->css);
+ next_mz = NULL;
+ } else /* next_mz == NULL or other memcg */
+ break;
+ } while (1);
+ }
+ mz->usage_in_excess =
+ res_counter_soft_limit_excess(&mz->mem->res);
+ __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+ /*
+ * One school of thought says that we should not add
+ * back the node to the tree if reclaim returns 0.
+ * But our reclaim could return 0, simply because due
+ * to priority we are exposing a smaller subset of
+ * memory to reclaim from. Consider this as a longer
+ * term TODO.
+ */
+ if (mz->usage_in_excess)
+ __mem_cgroup_insert_exceeded(mz->mem, mz, mctz);
+ spin_unlock(&mctz->lock);
+ css_put(&mz->mem->css);
+ loop++;
+ /*
+ * Could not reclaim anything and there are no more
+ * mem cgroups to try or we seem to be looping without
+ * reclaiming anything.
+ */
+ if (!nr_reclaimed &&
+ (next_mz == NULL ||
+ loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
+ break;
+ } while (!nr_reclaimed);
+ if (next_mz)
+ css_put(&next_mz->mem->css);
+ return nr_reclaimed;
+}
+
/*
* This routine traverse page_cgroup in given list and drop them all.
* *And* this routine doesn't reclaim page itself, just removes page_cgroup.
@@ -2046,20 +2499,64 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
return retval;
}
+struct mem_cgroup_idx_data {
+ s64 val;
+ enum mem_cgroup_stat_index idx;
+};
+
+static int
+mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data)
+{
+ struct mem_cgroup_idx_data *d = data;
+ d->val += mem_cgroup_read_stat(&mem->stat, d->idx);
+ return 0;
+}
+
+static void
+mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem,
+ enum mem_cgroup_stat_index idx, s64 *val)
+{
+ struct mem_cgroup_idx_data d;
+ d.idx = idx;
+ d.val = 0;
+ mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat);
+ *val = d.val;
+}
+
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
- u64 val = 0;
+ u64 idx_val, val;
int type, name;
type = MEMFILE_TYPE(cft->private);
name = MEMFILE_ATTR(cft->private);
switch (type) {
case _MEM:
- val = res_counter_read_u64(&mem->res, name);
+ if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+ mem_cgroup_get_recursive_idx_stat(mem,
+ MEM_CGROUP_STAT_CACHE, &idx_val);
+ val = idx_val;
+ mem_cgroup_get_recursive_idx_stat(mem,
+ MEM_CGROUP_STAT_RSS, &idx_val);
+ val += idx_val;
+ val <<= PAGE_SHIFT;
+ } else
+ val = res_counter_read_u64(&mem->res, name);
break;
case _MEMSWAP:
- val = res_counter_read_u64(&mem->memsw, name);
+ if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+ mem_cgroup_get_recursive_idx_stat(mem,
+ MEM_CGROUP_STAT_CACHE, &idx_val);
+ val = idx_val;
+ mem_cgroup_get_recursive_idx_stat(mem,
+ MEM_CGROUP_STAT_RSS, &idx_val);
+ val += idx_val;
+ mem_cgroup_get_recursive_idx_stat(mem,
+ MEM_CGROUP_STAT_SWAPOUT, &idx_val);
+ val <<= PAGE_SHIFT;
+ } else
+ val = res_counter_read_u64(&mem->memsw, name);
break;
default:
BUG();
@@ -2083,6 +2580,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
name = MEMFILE_ATTR(cft->private);
switch (name) {
case RES_LIMIT:
+ if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
+ ret = -EINVAL;
+ break;
+ }
/* This function does all necessary parse...reuse it */
ret = res_counter_memparse_write_strategy(buffer, &val);
if (ret)
@@ -2092,6 +2593,20 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
else
ret = mem_cgroup_resize_memsw_limit(memcg, val);
break;
+ case RES_SOFT_LIMIT:
+ ret = res_counter_memparse_write_strategy(buffer, &val);
+ if (ret)
+ break;
+ /*
+ * For memsw, soft limits are hard to implement in terms
+ * of semantics, for now, we support soft limits for
+ * control without swap
+ */
+ if (type == _MEM)
+ ret = res_counter_set_soft_limit(&memcg->res, val);
+ else
+ ret = -EINVAL;
+ break;
default:
ret = -EINVAL; /* should be BUG() ? */
break;
@@ -2149,6 +2664,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
res_counter_reset_failcnt(&mem->memsw);
break;
}
+
return 0;
}
@@ -2160,6 +2676,7 @@ enum {
MCS_MAPPED_FILE,
MCS_PGPGIN,
MCS_PGPGOUT,
+ MCS_SWAP,
MCS_INACTIVE_ANON,
MCS_ACTIVE_ANON,
MCS_INACTIVE_FILE,
@@ -2181,6 +2698,7 @@ struct {
{"mapped_file", "total_mapped_file"},
{"pgpgin", "total_pgpgin"},
{"pgpgout", "total_pgpgout"},
+ {"swap", "total_swap"},
{"inactive_anon", "total_inactive_anon"},
{"active_anon", "total_active_anon"},
{"inactive_file", "total_inactive_file"},
@@ -2205,6 +2723,10 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
s->stat[MCS_PGPGIN] += val;
val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
s->stat[MCS_PGPGOUT] += val;
+ if (do_swap_account) {
+ val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT);
+ s->stat[MCS_SWAP] += val * PAGE_SIZE;
+ }
/* per zone stat */
val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
@@ -2236,8 +2758,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
memset(&mystat, 0, sizeof(mystat));
mem_cgroup_get_local_stat(mem_cont, &mystat);
- for (i = 0; i < NR_MCS_STAT; i++)
+ for (i = 0; i < NR_MCS_STAT; i++) {
+ if (i == MCS_SWAP && !do_swap_account)
+ continue;
cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]);
+ }
/* Hierarchical information */
{
@@ -2250,9 +2775,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
memset(&mystat, 0, sizeof(mystat));
mem_cgroup_get_total_stat(mem_cont, &mystat);
- for (i = 0; i < NR_MCS_STAT; i++)
+ for (i = 0; i < NR_MCS_STAT; i++) {
+ if (i == MCS_SWAP && !do_swap_account)
+ continue;
cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]);
-
+ }
#ifdef CONFIG_DEBUG_VM
cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
@@ -2345,6 +2872,12 @@ static struct cftype mem_cgroup_files[] = {
.read_u64 = mem_cgroup_read,
},
{
+ .name = "soft_limit_in_bytes",
+ .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
+ .write_string = mem_cgroup_write,
+ .read_u64 = mem_cgroup_read,
+ },
+ {
.name = "failcnt",
.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
.trigger = mem_cgroup_reset,
@@ -2438,6 +2971,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
mz = &pn->zoneinfo[zone];
for_each_lru(l)
INIT_LIST_HEAD(&mz->lists[l]);
+ mz->usage_in_excess = 0;
+ mz->on_tree = false;
+ mz->mem = mem;
}
return 0;
}
@@ -2483,6 +3019,7 @@ static void __mem_cgroup_free(struct mem_cgroup *mem)
{
int node;
+ mem_cgroup_remove_from_trees(mem);
free_css_id(&mem_cgroup_subsys, &mem->css);
for_each_node_state(node, N_POSSIBLE)
@@ -2531,6 +3068,31 @@ static void __init enable_swap_cgroup(void)
}
#endif
+static int mem_cgroup_soft_limit_tree_init(void)
+{
+ struct mem_cgroup_tree_per_node *rtpn;
+ struct mem_cgroup_tree_per_zone *rtpz;
+ int tmp, node, zone;
+
+ for_each_node_state(node, N_POSSIBLE) {
+ tmp = node;
+ if (!node_state(node, N_NORMAL_MEMORY))
+ tmp = -1;
+ rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
+ if (!rtpn)
+ return 1;
+
+ soft_limit_tree.rb_tree_per_node[node] = rtpn;
+
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+ rtpz = &rtpn->rb_tree_per_zone[zone];
+ rtpz->rb_root = RB_ROOT;
+ spin_lock_init(&rtpz->lock);
+ }
+ }
+ return 0;
+}
+
static struct cgroup_subsys_state * __ref
mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
{
@@ -2545,10 +3107,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
for_each_node_state(node, N_POSSIBLE)
if (alloc_mem_cgroup_per_zone_info(mem, node))
goto free_out;
+
/* root ? */
if (cont->parent == NULL) {
enable_swap_cgroup();
parent = NULL;
+ root_mem_cgroup = mem;
+ if (mem_cgroup_soft_limit_tree_init())
+ goto free_out;
+
} else {
parent = mem_cgroup_from_cont(cont->parent);
mem->use_hierarchy = parent->use_hierarchy;
@@ -2577,6 +3144,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
return &mem->css;
free_out:
__mem_cgroup_free(mem);
+ root_mem_cgroup = NULL;
return ERR_PTR(error);
}
@@ -2612,7 +3180,8 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
static void mem_cgroup_move_task(struct cgroup_subsys *ss,
struct cgroup *cont,
struct cgroup *old_cont,
- struct task_struct *p)
+ struct task_struct *p,
+ bool threadgroup)
{
mutex_lock(&memcg_tasklist);
/*
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
new file mode 100644
index 00000000000..729d4b15b64
--- /dev/null
+++ b/mm/memory-failure.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright (C) 2008, 2009 Intel Corporation
+ * Authors: Andi Kleen, Fengguang Wu
+ *
+ * This software may be redistributed and/or modified under the terms of
+ * the GNU General Public License ("GPL") version 2 only as published by the
+ * Free Software Foundation.
+ *
+ * High level machine check handler. Handles pages reported by the
+ * hardware as being corrupted usually due to a 2bit ECC memory or cache
+ * failure.
+ *
+ * Handles page cache pages in various states. The tricky part
+ * here is that we can access any page asynchronous to other VM
+ * users, because memory failures could happen anytime and anywhere,
+ * possibly violating some of their assumptions. This is why this code
+ * has to be extremely careful. Generally it tries to use normal locking
+ * rules, as in get the standard locks, even if that means the
+ * error handling takes potentially a long time.
+ *
+ * The operation to map back from RMAP chains to processes has to walk
+ * the complete process list and has non linear complexity with the number
+ * mappings. In short it can be quite slow. But since memory corruptions
+ * are rare we hope to get away with this.
+ */
+
+/*
+ * Notebook:
+ * - hugetlb needs more code
+ * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages
+ * - pass bad pages to kdump next kernel
+ */
+#define DEBUG 1 /* remove me in 2.6.34 */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/sched.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/backing-dev.h>
+#include "internal.h"
+
+int sysctl_memory_failure_early_kill __read_mostly = 0;
+
+int sysctl_memory_failure_recovery __read_mostly = 1;
+
+atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
+
+/*
+ * Send all the processes who have the page mapped an ``action optional''
+ * signal.
+ */
+static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
+ unsigned long pfn)
+{
+ struct siginfo si;
+ int ret;
+
+ printk(KERN_ERR
+ "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
+ pfn, t->comm, t->pid);
+ si.si_signo = SIGBUS;
+ si.si_errno = 0;
+ si.si_code = BUS_MCEERR_AO;
+ si.si_addr = (void *)addr;
+#ifdef __ARCH_SI_TRAPNO
+ si.si_trapno = trapno;
+#endif
+ si.si_addr_lsb = PAGE_SHIFT;
+ /*
+ * Don't use force here, it's convenient if the signal
+ * can be temporarily blocked.
+ * This could cause a loop when the user sets SIGBUS
+ * to SIG_IGN, but hopefully noone will do that?
+ */
+ ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
+ if (ret < 0)
+ printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
+ t->comm, t->pid, ret);
+ return ret;
+}
+
+/*
+ * Kill all processes that have a poisoned page mapped and then isolate
+ * the page.
+ *
+ * General strategy:
+ * Find all processes having the page mapped and kill them.
+ * But we keep a page reference around so that the page is not
+ * actually freed yet.
+ * Then stash the page away
+ *
+ * There's no convenient way to get back to mapped processes
+ * from the VMAs. So do a brute-force search over all
+ * running processes.
+ *
+ * Remember that machine checks are not common (or rather
+ * if they are common you have other problems), so this shouldn't
+ * be a performance issue.
+ *
+ * Also there are some races possible while we get from the
+ * error detection to actually handle it.
+ */
+
+struct to_kill {
+ struct list_head nd;
+ struct task_struct *tsk;
+ unsigned long addr;
+ unsigned addr_valid:1;
+};
+
+/*
+ * Failure handling: if we can't find or can't kill a process there's
+ * not much we can do. We just print a message and ignore otherwise.
+ */
+
+/*
+ * Schedule a process for later kill.
+ * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
+ * TBD would GFP_NOIO be enough?
+ */
+static void add_to_kill(struct task_struct *tsk, struct page *p,
+ struct vm_area_struct *vma,
+ struct list_head *to_kill,
+ struct to_kill **tkc)
+{
+ struct to_kill *tk;
+
+ if (*tkc) {
+ tk = *tkc;
+ *tkc = NULL;
+ } else {
+ tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
+ if (!tk) {
+ printk(KERN_ERR
+ "MCE: Out of memory while machine check handling\n");
+ return;
+ }
+ }
+ tk->addr = page_address_in_vma(p, vma);
+ tk->addr_valid = 1;
+
+ /*
+ * In theory we don't have to kill when the page was
+ * munmaped. But it could be also a mremap. Since that's
+ * likely very rare kill anyways just out of paranoia, but use
+ * a SIGKILL because the error is not contained anymore.
+ */
+ if (tk->addr == -EFAULT) {
+ pr_debug("MCE: Unable to find user space address %lx in %s\n",
+ page_to_pfn(p), tsk->comm);
+ tk->addr_valid = 0;
+ }
+ get_task_struct(tsk);
+ tk->tsk = tsk;
+ list_add_tail(&tk->nd, to_kill);
+}
+
+/*
+ * Kill the processes that have been collected earlier.
+ *
+ * Only do anything when DOIT is set, otherwise just free the list
+ * (this is used for clean pages which do not need killing)
+ * Also when FAIL is set do a force kill because something went
+ * wrong earlier.
+ */
+static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
+ int fail, unsigned long pfn)
+{
+ struct to_kill *tk, *next;
+
+ list_for_each_entry_safe (tk, next, to_kill, nd) {
+ if (doit) {
+ /*
+ * In case something went wrong with munmaping
+ * make sure the process doesn't catch the
+ * signal and then access the memory. Just kill it.
+ * the signal handlers
+ */
+ if (fail || tk->addr_valid == 0) {
+ printk(KERN_ERR
+ "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
+ pfn, tk->tsk->comm, tk->tsk->pid);
+ force_sig(SIGKILL, tk->tsk);
+ }
+
+ /*
+ * In theory the process could have mapped
+ * something else on the address in-between. We could
+ * check for that, but we need to tell the
+ * process anyways.
+ */
+ else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
+ pfn) < 0)
+ printk(KERN_ERR
+ "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
+ pfn, tk->tsk->comm, tk->tsk->pid);
+ }
+ put_task_struct(tk->tsk);
+ kfree(tk);
+ }
+}
+
+static int task_early_kill(struct task_struct *tsk)
+{
+ if (!tsk->mm)
+ return 0;
+ if (tsk->flags & PF_MCE_PROCESS)
+ return !!(tsk->flags & PF_MCE_EARLY);
+ return sysctl_memory_failure_early_kill;
+}
+
+/*
+ * Collect processes when the error hit an anonymous page.
+ */
+static void collect_procs_anon(struct page *page, struct list_head *to_kill,
+ struct to_kill **tkc)
+{
+ struct vm_area_struct *vma;
+ struct task_struct *tsk;
+ struct anon_vma *av;
+
+ read_lock(&tasklist_lock);
+ av = page_lock_anon_vma(page);
+ if (av == NULL) /* Not actually mapped anymore */
+ goto out;
+ for_each_process (tsk) {
+ if (!task_early_kill(tsk))
+ continue;
+ list_for_each_entry (vma, &av->head, anon_vma_node) {
+ if (!page_mapped_in_vma(page, vma))
+ continue;
+ if (vma->vm_mm == tsk->mm)
+ add_to_kill(tsk, page, vma, to_kill, tkc);
+ }
+ }
+ page_unlock_anon_vma(av);
+out:
+ read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect processes when the error hit a file mapped page.
+ */
+static void collect_procs_file(struct page *page, struct list_head *to_kill,
+ struct to_kill **tkc)
+{
+ struct vm_area_struct *vma;
+ struct task_struct *tsk;
+ struct prio_tree_iter iter;
+ struct address_space *mapping = page->mapping;
+
+ /*
+ * A note on the locking order between the two locks.
+ * We don't rely on this particular order.
+ * If you have some other code that needs a different order
+ * feel free to switch them around. Or add a reverse link
+ * from mm_struct to task_struct, then this could be all
+ * done without taking tasklist_lock and looping over all tasks.
+ */
+
+ read_lock(&tasklist_lock);
+ spin_lock(&mapping->i_mmap_lock);
+ for_each_process(tsk) {
+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+ if (!task_early_kill(tsk))
+ continue;
+
+ vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
+ pgoff) {
+ /*
+ * Send early kill signal to tasks where a vma covers
+ * the page but the corrupted page is not necessarily
+ * mapped it in its pte.
+ * Assume applications who requested early kill want
+ * to be informed of all such data corruptions.
+ */
+ if (vma->vm_mm == tsk->mm)
+ add_to_kill(tsk, page, vma, to_kill, tkc);
+ }
+ }
+ spin_unlock(&mapping->i_mmap_lock);
+ read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect the processes who have the corrupted page mapped to kill.
+ * This is done in two steps for locking reasons.
+ * First preallocate one tokill structure outside the spin locks,
+ * so that we can kill at least one process reasonably reliable.
+ */
+static void collect_procs(struct page *page, struct list_head *tokill)
+{
+ struct to_kill *tk;
+
+ if (!page->mapping)
+ return;
+
+ tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
+ if (!tk)
+ return;
+ if (PageAnon(page))
+ collect_procs_anon(page, tokill, &tk);
+ else
+ collect_procs_file(page, tokill, &tk);
+ kfree(tk);
+}
+
+/*
+ * Error handlers for various types of pages.
+ */
+
+enum outcome {
+ FAILED, /* Error handling failed */
+ DELAYED, /* Will be handled later */
+ IGNORED, /* Error safely ignored */
+ RECOVERED, /* Successfully recovered */
+};
+
+static const char *action_name[] = {
+ [FAILED] = "Failed",
+ [DELAYED] = "Delayed",
+ [IGNORED] = "Ignored",
+ [RECOVERED] = "Recovered",
+};
+
+/*
+ * Error hit kernel page.
+ * Do nothing, try to be lucky and not touch this instead. For a few cases we
+ * could be more sophisticated.
+ */
+static int me_kernel(struct page *p, unsigned long pfn)
+{
+ return DELAYED;
+}
+
+/*
+ * Already poisoned page.
+ */
+static int me_ignore(struct page *p, unsigned long pfn)
+{
+ return IGNORED;
+}
+
+/*
+ * Page in unknown state. Do nothing.
+ */
+static int me_unknown(struct page *p, unsigned long pfn)
+{
+ printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn);
+ return FAILED;
+}
+
+/*
+ * Free memory
+ */
+static int me_free(struct page *p, unsigned long pfn)
+{
+ return DELAYED;
+}
+
+/*
+ * Clean (or cleaned) page cache page.
+ */
+static int me_pagecache_clean(struct page *p, unsigned long pfn)
+{
+ int err;
+ int ret = FAILED;
+ struct address_space *mapping;
+
+ if (!isolate_lru_page(p))
+ page_cache_release(p);
+
+ /*
+ * For anonymous pages we're done the only reference left
+ * should be the one m_f() holds.
+ */
+ if (PageAnon(p))
+ return RECOVERED;
+
+ /*
+ * Now truncate the page in the page cache. This is really
+ * more like a "temporary hole punch"
+ * Don't do this for block devices when someone else
+ * has a reference, because it could be file system metadata
+ * and that's not safe to truncate.
+ */
+ mapping = page_mapping(p);
+ if (!mapping) {
+ /*
+ * Page has been teared down in the meanwhile
+ */
+ return FAILED;
+ }
+
+ /*
+ * Truncation is a bit tricky. Enable it per file system for now.
+ *
+ * Open: to take i_mutex or not for this? Right now we don't.
+ */
+ if (mapping->a_ops->error_remove_page) {
+ err = mapping->a_ops->error_remove_page(mapping, p);
+ if (err != 0) {
+ printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n",
+ pfn, err);
+ } else if (page_has_private(p) &&
+ !try_to_release_page(p, GFP_NOIO)) {
+ pr_debug("MCE %#lx: failed to release buffers\n", pfn);
+ } else {
+ ret = RECOVERED;
+ }
+ } else {
+ /*
+ * If the file system doesn't support it just invalidate
+ * This fails on dirty or anything with private pages
+ */
+ if (invalidate_inode_page(p))
+ ret = RECOVERED;
+ else
+ printk(KERN_INFO "MCE %#lx: Failed to invalidate\n",
+ pfn);
+ }
+ return ret;
+}
+
+/*
+ * Dirty cache page page
+ * Issues: when the error hit a hole page the error is not properly
+ * propagated.
+ */
+static int me_pagecache_dirty(struct page *p, unsigned long pfn)
+{
+ struct address_space *mapping = page_mapping(p);
+
+ SetPageError(p);
+ /* TBD: print more information about the file. */
+ if (mapping) {
+ /*
+ * IO error will be reported by write(), fsync(), etc.
+ * who check the mapping.
+ * This way the application knows that something went
+ * wrong with its dirty file data.
+ *
+ * There's one open issue:
+ *
+ * The EIO will be only reported on the next IO
+ * operation and then cleared through the IO map.
+ * Normally Linux has two mechanisms to pass IO error
+ * first through the AS_EIO flag in the address space
+ * and then through the PageError flag in the page.
+ * Since we drop pages on memory failure handling the
+ * only mechanism open to use is through AS_AIO.
+ *
+ * This has the disadvantage that it gets cleared on
+ * the first operation that returns an error, while
+ * the PageError bit is more sticky and only cleared
+ * when the page is reread or dropped. If an
+ * application assumes it will always get error on
+ * fsync, but does other operations on the fd before
+ * and the page is dropped inbetween then the error
+ * will not be properly reported.
+ *
+ * This can already happen even without hwpoisoned
+ * pages: first on metadata IO errors (which only
+ * report through AS_EIO) or when the page is dropped
+ * at the wrong time.
+ *
+ * So right now we assume that the application DTRT on
+ * the first EIO, but we're not worse than other parts
+ * of the kernel.
+ */
+ mapping_set_error(mapping, EIO);
+ }
+
+ return me_pagecache_clean(p, pfn);
+}
+
+/*
+ * Clean and dirty swap cache.
+ *
+ * Dirty swap cache page is tricky to handle. The page could live both in page
+ * cache and swap cache(ie. page is freshly swapped in). So it could be
+ * referenced concurrently by 2 types of PTEs:
+ * normal PTEs and swap PTEs. We try to handle them consistently by calling
+ * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs,
+ * and then
+ * - clear dirty bit to prevent IO
+ * - remove from LRU
+ * - but keep in the swap cache, so that when we return to it on
+ * a later page fault, we know the application is accessing
+ * corrupted data and shall be killed (we installed simple
+ * interception code in do_swap_page to catch it).
+ *
+ * Clean swap cache pages can be directly isolated. A later page fault will
+ * bring in the known good data from disk.
+ */
+static int me_swapcache_dirty(struct page *p, unsigned long pfn)
+{
+ int ret = FAILED;
+
+ ClearPageDirty(p);
+ /* Trigger EIO in shmem: */
+ ClearPageUptodate(p);
+
+ if (!isolate_lru_page(p)) {
+ page_cache_release(p);
+ ret = DELAYED;
+ }
+
+ return ret;
+}
+
+static int me_swapcache_clean(struct page *p, unsigned long pfn)
+{
+ int ret = FAILED;
+
+ if (!isolate_lru_page(p)) {
+ page_cache_release(p);
+ ret = RECOVERED;
+ }
+ delete_from_swap_cache(p);
+ return ret;
+}
+
+/*
+ * Huge pages. Needs work.
+ * Issues:
+ * No rmap support so we cannot find the original mapper. In theory could walk
+ * all MMs and look for the mappings, but that would be non atomic and racy.
+ * Need rmap for hugepages for this. Alternatively we could employ a heuristic,
+ * like just walking the current process and hoping it has it mapped (that
+ * should be usually true for the common "shared database cache" case)
+ * Should handle free huge pages and dequeue them too, but this needs to
+ * handle huge page accounting correctly.
+ */
+static int me_huge_page(struct page *p, unsigned long pfn)
+{
+ return FAILED;
+}
+
+/*
+ * Various page states we can handle.
+ *
+ * A page state is defined by its current page->flags bits.
+ * The table matches them in order and calls the right handler.
+ *
+ * This is quite tricky because we can access page at any time
+ * in its live cycle, so all accesses have to be extremly careful.
+ *
+ * This is not complete. More states could be added.
+ * For any missing state don't attempt recovery.
+ */
+
+#define dirty (1UL << PG_dirty)
+#define sc (1UL << PG_swapcache)
+#define unevict (1UL << PG_unevictable)
+#define mlock (1UL << PG_mlocked)
+#define writeback (1UL << PG_writeback)
+#define lru (1UL << PG_lru)
+#define swapbacked (1UL << PG_swapbacked)
+#define head (1UL << PG_head)
+#define tail (1UL << PG_tail)
+#define compound (1UL << PG_compound)
+#define slab (1UL << PG_slab)
+#define buddy (1UL << PG_buddy)
+#define reserved (1UL << PG_reserved)
+
+static struct page_state {
+ unsigned long mask;
+ unsigned long res;
+ char *msg;
+ int (*action)(struct page *p, unsigned long pfn);
+} error_states[] = {
+ { reserved, reserved, "reserved kernel", me_ignore },
+ { buddy, buddy, "free kernel", me_free },
+
+ /*
+ * Could in theory check if slab page is free or if we can drop
+ * currently unused objects without touching them. But just
+ * treat it as standard kernel for now.
+ */
+ { slab, slab, "kernel slab", me_kernel },
+
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+ { head, head, "huge", me_huge_page },
+ { tail, tail, "huge", me_huge_page },
+#else
+ { compound, compound, "huge", me_huge_page },
+#endif
+
+ { sc|dirty, sc|dirty, "swapcache", me_swapcache_dirty },
+ { sc|dirty, sc, "swapcache", me_swapcache_clean },
+
+ { unevict|dirty, unevict|dirty, "unevictable LRU", me_pagecache_dirty},
+ { unevict, unevict, "unevictable LRU", me_pagecache_clean},
+
+#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
+ { mlock|dirty, mlock|dirty, "mlocked LRU", me_pagecache_dirty },
+ { mlock, mlock, "mlocked LRU", me_pagecache_clean },
+#endif
+
+ { lru|dirty, lru|dirty, "LRU", me_pagecache_dirty },
+ { lru|dirty, lru, "clean LRU", me_pagecache_clean },
+ { swapbacked, swapbacked, "anonymous", me_pagecache_clean },
+
+ /*
+ * Catchall entry: must be at end.
+ */
+ { 0, 0, "unknown page state", me_unknown },
+};
+
+#undef lru
+
+static void action_result(unsigned long pfn, char *msg, int result)
+{
+ struct page *page = NULL;
+ if (pfn_valid(pfn))
+ page = pfn_to_page(pfn);
+
+ printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n",
+ pfn,
+ page && PageDirty(page) ? "dirty " : "",
+ msg, action_name[result]);
+}
+
+static int page_action(struct page_state *ps, struct page *p,
+ unsigned long pfn, int ref)
+{
+ int result;
+
+ result = ps->action(p, pfn);
+ action_result(pfn, ps->msg, result);
+ if (page_count(p) != 1 + ref)
+ printk(KERN_ERR
+ "MCE %#lx: %s page still referenced by %d users\n",
+ pfn, ps->msg, page_count(p) - 1);
+
+ /* Could do more checks here if page looks ok */
+ /*
+ * Could adjust zone counters here to correct for the missing page.
+ */
+
+ return result == RECOVERED ? 0 : -EBUSY;
+}
+
+#define N_UNMAP_TRIES 5
+
+/*
+ * Do all that is necessary to remove user space mappings. Unmap
+ * the pages and send SIGBUS to the processes if the data was dirty.
+ */
+static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
+ int trapno)
+{
+ enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+ struct address_space *mapping;
+ LIST_HEAD(tokill);
+ int ret;
+ int i;
+ int kill = 1;
+
+ if (PageReserved(p) || PageCompound(p) || PageSlab(p))
+ return;
+
+ if (!PageLRU(p))
+ lru_add_drain_all();
+
+ /*
+ * This check implies we don't kill processes if their pages
+ * are in the swap cache early. Those are always late kills.
+ */
+ if (!page_mapped(p))
+ return;
+
+ if (PageSwapCache(p)) {
+ printk(KERN_ERR
+ "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
+ ttu |= TTU_IGNORE_HWPOISON;
+ }
+
+ /*
+ * Propagate the dirty bit from PTEs to struct page first, because we
+ * need this to decide if we should kill or just drop the page.
+ */
+ mapping = page_mapping(p);
+ if (!PageDirty(p) && mapping && mapping_cap_writeback_dirty(mapping)) {
+ if (page_mkclean(p)) {
+ SetPageDirty(p);
+ } else {
+ kill = 0;
+ ttu |= TTU_IGNORE_HWPOISON;
+ printk(KERN_INFO
+ "MCE %#lx: corrupted page was clean: dropped without side effects\n",
+ pfn);
+ }
+ }
+
+ /*
+ * First collect all the processes that have the page
+ * mapped in dirty form. This has to be done before try_to_unmap,
+ * because ttu takes the rmap data structures down.
+ *
+ * Error handling: We ignore errors here because
+ * there's nothing that can be done.
+ */
+ if (kill)
+ collect_procs(p, &tokill);
+
+ /*
+ * try_to_unmap can fail temporarily due to races.
+ * Try a few times (RED-PEN better strategy?)
+ */
+ for (i = 0; i < N_UNMAP_TRIES; i++) {
+ ret = try_to_unmap(p, ttu);
+ if (ret == SWAP_SUCCESS)
+ break;
+ pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn, ret);
+ }
+
+ if (ret != SWAP_SUCCESS)
+ printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
+ pfn, page_mapcount(p));
+
+ /*
+ * Now that the dirty bit has been propagated to the
+ * struct page and all unmaps done we can decide if
+ * killing is needed or not. Only kill when the page
+ * was dirty, otherwise the tokill list is merely
+ * freed. When there was a problem unmapping earlier
+ * use a more force-full uncatchable kill to prevent
+ * any accesses to the poisoned memory.
+ */
+ kill_procs_ao(&tokill, !!PageDirty(p), trapno,
+ ret != SWAP_SUCCESS, pfn);
+}
+
+int __memory_failure(unsigned long pfn, int trapno, int ref)
+{
+ struct page_state *ps;
+ struct page *p;
+ int res;
+
+ if (!sysctl_memory_failure_recovery)
+ panic("Memory failure from trap %d on page %lx", trapno, pfn);
+
+ if (!pfn_valid(pfn)) {
+ action_result(pfn, "memory outside kernel control", IGNORED);
+ return -EIO;
+ }
+
+ p = pfn_to_page(pfn);
+ if (TestSetPageHWPoison(p)) {
+ action_result(pfn, "already hardware poisoned", IGNORED);
+ return 0;
+ }
+
+ atomic_long_add(1, &mce_bad_pages);
+
+ /*
+ * We need/can do nothing about count=0 pages.
+ * 1) it's a free page, and therefore in safe hand:
+ * prep_new_page() will be the gate keeper.
+ * 2) it's part of a non-compound high order page.
+ * Implies some kernel user: cannot stop them from
+ * R/W the page; let's pray that the page has been
+ * used and will be freed some time later.
+ * In fact it's dangerous to directly bump up page count from 0,
+ * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
+ */
+ if (!get_page_unless_zero(compound_head(p))) {
+ action_result(pfn, "free or high order kernel", IGNORED);
+ return PageBuddy(compound_head(p)) ? 0 : -EBUSY;
+ }
+
+ /*
+ * Lock the page and wait for writeback to finish.
+ * It's very difficult to mess with pages currently under IO
+ * and in many cases impossible, so we just avoid it here.
+ */
+ lock_page_nosync(p);
+ wait_on_page_writeback(p);
+
+ /*
+ * Now take care of user space mappings.
+ */
+ hwpoison_user_mappings(p, pfn, trapno);
+
+ /*
+ * Torn down by someone else?
+ */
+ if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+ action_result(pfn, "already truncated LRU", IGNORED);
+ res = 0;
+ goto out;
+ }
+
+ res = -EBUSY;
+ for (ps = error_states;; ps++) {
+ if ((p->flags & ps->mask) == ps->res) {
+ res = page_action(ps, p, pfn, ref);
+ break;
+ }
+ }
+out:
+ unlock_page(p);
+ return res;
+}
+EXPORT_SYMBOL_GPL(__memory_failure);
+
+/**
+ * memory_failure - Handle memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @trapno: Trap number reported in the signal to user space.
+ *
+ * This function is called by the low level machine check code
+ * of an architecture when it detects hardware memory corruption
+ * of a page. It tries its best to recover, which includes
+ * dropping pages, killing processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Must run in process context (e.g. a work queue) with interrupts
+ * enabled and no spinlocks hold.
+ */
+void memory_failure(unsigned long pfn, int trapno)
+{
+ __memory_failure(pfn, trapno, 0);
+}
diff --git a/mm/memory.c b/mm/memory.c
index b1443ac07c0..7e91b5f9f69 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long addr = vma->vm_start;
/*
- * Hide vma from rmap and vmtruncate before freeing pgtables
+ * Hide vma from rmap and truncate_pagecache before freeing
+ * pgtables
*/
anon_vma_unlink(vma);
unlink_file_vma(vma);
@@ -1325,7 +1326,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
return i ? i : -ENOMEM;
- else if (ret & VM_FAULT_SIGBUS)
+ if (ret &
+ (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
return i ? i : -EFAULT;
BUG();
}
@@ -2407,7 +2409,7 @@ restart:
* @mapping: the address space containing mmaps to be unmapped.
* @holebegin: byte in first page to unmap, relative to the start of
* the underlying file. This will be rounded down to a PAGE_SIZE
- * boundary. Note that this is different from vmtruncate(), which
+ * boundary. Note that this is different from truncate_pagecache(), which
* must keep the partial page. In contrast, we must get rid of
* partial pages.
* @holelen: size of prospective hole in bytes. This will be rounded
@@ -2458,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping,
}
EXPORT_SYMBOL(unmap_mapping_range);
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page. Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
- if (inode->i_size < offset) {
- unsigned long limit;
-
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && offset > limit)
- goto out_sig;
- if (offset > inode->i_sb->s_maxbytes)
- goto out_big;
- i_size_write(inode, offset);
- } else {
- struct address_space *mapping = inode->i_mapping;
-
- /*
- * truncation of in-use swapfiles is disallowed - it would
- * cause subsequent swapout to scribble on the now-freed
- * blocks.
- */
- if (IS_SWAPFILE(inode))
- return -ETXTBSY;
- i_size_write(inode, offset);
-
- /*
- * unmap_mapping_range is called twice, first simply for
- * efficiency so that truncate_inode_pages does fewer
- * single-page unmaps. However after this first call, and
- * before truncate_inode_pages finishes, it is possible for
- * private pages to be COWed, which remain after
- * truncate_inode_pages finishes, hence the second
- * unmap_mapping_range call must be made for correctness.
- */
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, offset);
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- }
-
- if (inode->i_op->truncate)
- inode->i_op->truncate(inode);
- return 0;
-
-out_sig:
- send_sig(SIGXFSZ, current, 0);
-out_big:
- return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-
int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
{
struct address_space *mapping = inode->i_mapping;
@@ -2559,8 +2504,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out;
entry = pte_to_swp_entry(orig_pte);
- if (is_migration_entry(entry)) {
- migration_entry_wait(mm, pmd, address);
+ if (unlikely(non_swap_entry(entry))) {
+ if (is_migration_entry(entry)) {
+ migration_entry_wait(mm, pmd, address);
+ } else if (is_hwpoison_entry(entry)) {
+ ret = VM_FAULT_HWPOISON;
+ } else {
+ print_bad_pte(vma, address, orig_pte, NULL);
+ ret = VM_FAULT_OOM;
+ }
goto out;
}
delayacct_set_flag(DELAYACCT_PF_SWAPIN);
@@ -2584,6 +2536,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
+ } else if (PageHWPoison(page)) {
+ ret = VM_FAULT_HWPOISON;
+ delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+ goto out;
}
lock_page(page);
@@ -2760,6 +2716,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
return ret;
+ if (unlikely(PageHWPoison(vmf.page))) {
+ if (ret & VM_FAULT_LOCKED)
+ unlock_page(vmf.page);
+ return VM_FAULT_HWPOISON;
+ }
+
/*
* For consistency in subsequent calls, make the faulted page always
* locked.
diff --git a/mm/migrate.c b/mm/migrate.c
index 16052e80aaa..1a4bf481378 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -675,7 +675,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
}
/* Establish migration ptes or remove ptes */
- try_to_unmap(page, 1);
+ try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
skip_unmap:
if (!page_mapped(page))
diff --git a/mm/mremap.c b/mm/mremap.c
index 20a07dba6be..97bff254771 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
if (vma->vm_file) {
/*
* Subtle point from Rajesh Venkatasubramanian: before
- * moving file-based ptes, we must lock vmtruncate out,
- * since it might clean the dst vma before the src vma,
+ * moving file-based ptes, we must lock truncate_pagecache
+ * out, since it might clean the dst vma before the src vma,
* and we propagate stale pages into the dst afterward.
*/
mapping = vma->vm_file->f_mapping;
diff --git a/mm/nommu.c b/mm/nommu.c
index 8d484241d03..56a446f0597 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -83,46 +83,6 @@ struct vm_operations_struct generic_file_vm_ops = {
};
/*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page. Ugly, but necessary.
- */
-int vmtruncate(struct inode *inode, loff_t offset)
-{
- struct address_space *mapping = inode->i_mapping;
- unsigned long limit;
-
- if (inode->i_size < offset)
- goto do_expand;
- i_size_write(inode, offset);
-
- truncate_inode_pages(mapping, offset);
- goto out_truncate;
-
-do_expand:
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && offset > limit)
- goto out_sig;
- if (offset > inode->i_sb->s_maxbytes)
- goto out;
- i_size_write(inode, offset);
-
-out_truncate:
- if (inode->i_op->truncate)
- inode->i_op->truncate(inode);
- return 0;
-out_sig:
- send_sig(SIGXFSZ, current, 0);
-out:
- return -EFBIG;
-}
-
-EXPORT_SYMBOL(vmtruncate);
-
-/*
* Return the total memory allocated for this pointer, not
* just what the caller asked for.
*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5f378dd5880..d99664e8607 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -155,37 +155,37 @@ static void update_completion_period(void)
}
int dirty_background_ratio_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
- ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write)
dirty_background_bytes = 0;
return ret;
}
int dirty_background_bytes_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
- ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write)
dirty_background_ratio = 0;
return ret;
}
int dirty_ratio_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int old_ratio = vm_dirty_ratio;
int ret;
- ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
update_completion_period();
vm_dirty_bytes = 0;
@@ -195,13 +195,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
int dirty_bytes_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
unsigned long old_bytes = vm_dirty_bytes;
int ret;
- ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
update_completion_period();
vm_dirty_ratio = 0;
@@ -686,9 +686,9 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
* sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
*/
int dirty_writeback_centisecs_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec(table, write, file, buffer, length, ppos);
+ proc_dointvec(table, write, buffer, length, ppos);
return 0;
}
@@ -1149,6 +1149,13 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
EXPORT_SYMBOL(redirty_page_for_writepage);
/*
+ * Dirty a page.
+ *
+ * For pages with a mapping this should be done under the page lock
+ * for the benefit of asynchronous memory errors who prefer a consistent
+ * dirty state. This rule can be broken in some special cases,
+ * but should be better not to.
+ *
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
*/
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5717f27a070..bf720550b44 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -234,6 +234,12 @@ static void bad_page(struct page *page)
static unsigned long nr_shown;
static unsigned long nr_unshown;
+ /* Don't complain about poisoned pages */
+ if (PageHWPoison(page)) {
+ __ClearPageBuddy(page);
+ return;
+ }
+
/*
* Allow a burst of 60 reports, then keep quiet for that minute;
* or allow a steady drip of one report per second.
@@ -666,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page,
/*
* This page is about to be returned from the page allocator
*/
-static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+static inline int check_new_page(struct page *page)
{
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
@@ -675,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
bad_page(page);
return 1;
}
+ return 0;
+}
+
+static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+{
+ int i;
+
+ for (i = 0; i < (1 << order); i++) {
+ struct page *p = page + i;
+ if (unlikely(check_new_page(p)))
+ return 1;
+ }
set_page_private(page, 0);
set_page_refcounted(page);
@@ -2373,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
* sysctl handler for numa_zonelist_order
*/
int numa_zonelist_order_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length,
+ void __user *buffer, size_t *length,
loff_t *ppos)
{
char saved_string[NUMA_ZONELIST_ORDER_LEN];
@@ -2382,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
if (write)
strncpy(saved_string, (char*)table->data,
NUMA_ZONELIST_ORDER_LEN);
- ret = proc_dostring(table, write, file, buffer, length, ppos);
+ ret = proc_dostring(table, write, buffer, length, ppos);
if (ret)
return ret;
if (write) {
@@ -4706,9 +4724,9 @@ module_init(init_per_zone_wmark_min)
* changes.
*/
int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec(table, write, file, buffer, length, ppos);
+ proc_dointvec(table, write, buffer, length, ppos);
if (write)
setup_per_zone_wmarks();
return 0;
@@ -4716,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
#ifdef CONFIG_NUMA
int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
int rc;
- rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
@@ -4732,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
}
int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
int rc;
- rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
@@ -4758,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
* if in function of the boot time zone sizes.
*/
int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ proc_dointvec_minmax(table, write, buffer, length, ppos);
setup_per_zone_lowmem_reserve();
return 0;
}
@@ -4772,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
*/
int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
unsigned int cpu;
int ret;
- ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (!write || (ret == -EINVAL))
return ret;
for_each_populated_zone(zone) {
diff --git a/mm/quicklist.c b/mm/quicklist.c
index 6eedf7e473d..6633965bb27 100644
--- a/mm/quicklist.c
+++ b/mm/quicklist.c
@@ -29,7 +29,6 @@ static unsigned long max_pages(unsigned long min_pages)
int node = numa_node_id();
struct zone *zones = NODE_DATA(node)->node_zones;
int num_cpus_on_node;
- const struct cpumask *cpumask_on_node = cpumask_of_node(node);
node_free_pages =
#ifdef CONFIG_ZONE_DMA
@@ -42,7 +41,7 @@ static unsigned long max_pages(unsigned long min_pages)
max = node_free_pages / FRACTION_OF_NODE_MEM;
- num_cpus_on_node = cpus_weight_nr(*cpumask_on_node);
+ num_cpus_on_node = cpumask_weight(cpumask_of_node(node));
max /= num_cpus_on_node;
return max(max, min_pages);
diff --git a/mm/rmap.c b/mm/rmap.c
index 720fc03a7bc..28aafe2b530 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -36,6 +36,11 @@
* mapping->tree_lock (widely used, in set_page_dirty,
* in arch-dependent flush_dcache_mmap_lock,
* within inode_lock in __sync_single_inode)
+ *
+ * (code doesn't rely on that order so it could be switched around)
+ * ->tasklist_lock
+ * anon_vma->lock (memory_failure, collect_procs_anon)
+ * pte map lock
*/
#include <linux/mm.h>
@@ -191,7 +196,7 @@ void __init anon_vma_init(void)
* Getting a lock on a stable anon_vma from a page off the LRU is
* tricky: page_lock_anon_vma rely on RCU to guard against the races.
*/
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
{
struct anon_vma *anon_vma;
unsigned long anon_mapping;
@@ -211,7 +216,7 @@ out:
return NULL;
}
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
spin_unlock(&anon_vma->lock);
rcu_read_unlock();
@@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
* if the page is not mapped into the page tables of this VMA. Only
* valid for normal file or anonymous VMAs.
*/
-static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
{
unsigned long address;
pte_t *pte;
@@ -756,7 +761,7 @@ void page_remove_rmap(struct page *page)
* repeatedly from either try_to_unmap_anon or try_to_unmap_file.
*/
static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
- int migration)
+ enum ttu_flags flags)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
@@ -778,11 +783,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* If it's recently referenced (perhaps page_referenced
* skipped over this mm) then we should reactivate it.
*/
- if (!migration) {
+ if (!(flags & TTU_IGNORE_MLOCK)) {
if (vma->vm_flags & VM_LOCKED) {
ret = SWAP_MLOCK;
goto out_unmap;
}
+ }
+ if (!(flags & TTU_IGNORE_ACCESS)) {
if (ptep_clear_flush_young_notify(vma, address, pte)) {
ret = SWAP_FAIL;
goto out_unmap;
@@ -800,7 +807,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
/* Update high watermark before we lower rss */
update_hiwater_rss(mm);
- if (PageAnon(page)) {
+ if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
+ if (PageAnon(page))
+ dec_mm_counter(mm, anon_rss);
+ else
+ dec_mm_counter(mm, file_rss);
+ set_pte_at(mm, address, pte,
+ swp_entry_to_pte(make_hwpoison_entry(page)));
+ } else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(page) };
if (PageSwapCache(page)) {
@@ -822,12 +836,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* pte. do_swap_page() will wait until the migration
* pte is removed and then restart fault handling.
*/
- BUG_ON(!migration);
+ BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
entry = make_migration_entry(page, pte_write(pteval));
}
set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
BUG_ON(pte_file(*pte));
- } else if (PAGE_MIGRATION && migration) {
+ } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
/* Establish migration entry for a file page */
swp_entry_t entry;
entry = make_migration_entry(page, pte_write(pteval));
@@ -996,12 +1010,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
* 'LOCKED.
*/
-static int try_to_unmap_anon(struct page *page, int unlock, int migration)
+static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
{
struct anon_vma *anon_vma;
struct vm_area_struct *vma;
unsigned int mlocked = 0;
int ret = SWAP_AGAIN;
+ int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
if (MLOCK_PAGES && unlikely(unlock))
ret = SWAP_SUCCESS; /* default for try_to_munlock() */
@@ -1017,7 +1032,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
continue; /* must visit all unlocked vmas */
ret = SWAP_MLOCK; /* saw at least one mlocked vma */
} else {
- ret = try_to_unmap_one(page, vma, migration);
+ ret = try_to_unmap_one(page, vma, flags);
if (ret == SWAP_FAIL || !page_mapped(page))
break;
}
@@ -1041,8 +1056,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
/**
* try_to_unmap_file - unmap/unlock file page using the object-based rmap method
* @page: the page to unmap/unlock
- * @unlock: request for unlock rather than unmap [unlikely]
- * @migration: unmapping for migration - ignored if @unlock
+ * @flags: action and flags
*
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the address_space struct it points to.
@@ -1054,7 +1068,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
* 'LOCKED.
*/
-static int try_to_unmap_file(struct page *page, int unlock, int migration)
+static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
{
struct address_space *mapping = page->mapping;
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -1066,6 +1080,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
unsigned long max_nl_size = 0;
unsigned int mapcount;
unsigned int mlocked = 0;
+ int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
if (MLOCK_PAGES && unlikely(unlock))
ret = SWAP_SUCCESS; /* default for try_to_munlock() */
@@ -1078,7 +1093,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
continue; /* must visit all vmas */
ret = SWAP_MLOCK;
} else {
- ret = try_to_unmap_one(page, vma, migration);
+ ret = try_to_unmap_one(page, vma, flags);
if (ret == SWAP_FAIL || !page_mapped(page))
goto out;
}
@@ -1103,7 +1118,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
ret = SWAP_MLOCK; /* leave mlocked == 0 */
goto out; /* no need to look further */
}
- if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
+ if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
+ (vma->vm_flags & VM_LOCKED))
continue;
cursor = (unsigned long) vma->vm_private_data;
if (cursor > max_nl_cursor)
@@ -1137,7 +1153,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
do {
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) {
- if (!MLOCK_PAGES && !migration &&
+ if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
(vma->vm_flags & VM_LOCKED))
continue;
cursor = (unsigned long) vma->vm_private_data;
@@ -1177,7 +1193,7 @@ out:
/**
* try_to_unmap - try to remove all page table mappings to a page
* @page: the page to get unmapped
- * @migration: migration flag
+ * @flags: action and flags
*
* Tries to remove all the page table entries which are mapping this
* page, used in the pageout path. Caller must hold the page lock.
@@ -1188,16 +1204,16 @@ out:
* SWAP_FAIL - the page is unswappable
* SWAP_MLOCK - page is mlocked.
*/
-int try_to_unmap(struct page *page, int migration)
+int try_to_unmap(struct page *page, enum ttu_flags flags)
{
int ret;
BUG_ON(!PageLocked(page));
if (PageAnon(page))
- ret = try_to_unmap_anon(page, 0, migration);
+ ret = try_to_unmap_anon(page, flags);
else
- ret = try_to_unmap_file(page, 0, migration);
+ ret = try_to_unmap_file(page, flags);
if (ret != SWAP_MLOCK && !page_mapped(page))
ret = SWAP_SUCCESS;
return ret;
@@ -1222,8 +1238,8 @@ int try_to_munlock(struct page *page)
VM_BUG_ON(!PageLocked(page) || PageLRU(page));
if (PageAnon(page))
- return try_to_unmap_anon(page, 1, 0);
+ return try_to_unmap_anon(page, TTU_MUNLOCK);
else
- return try_to_unmap_file(page, 1, 0);
+ return try_to_unmap_file(page, TTU_MUNLOCK);
}
diff --git a/mm/shmem.c b/mm/shmem.c
index b206a7a32e2..98631c26c20 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1633,8 +1633,8 @@ shmem_write_end(struct file *file, struct address_space *mapping,
if (pos + copied > inode->i_size)
i_size_write(inode, pos + copied);
- unlock_page(page);
set_page_dirty(page);
+ unlock_page(page);
page_cache_release(page);
return copied;
@@ -1971,13 +1971,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
iput(inode);
return error;
}
- unlock_page(page);
inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations;
kaddr = kmap_atomic(page, KM_USER0);
memcpy(kaddr, symname, len);
kunmap_atomic(kaddr, KM_USER0);
set_page_dirty(page);
+ unlock_page(page);
page_cache_release(page);
}
if (dir->i_mode & S_ISGID)
@@ -2420,6 +2420,7 @@ static const struct address_space_operations shmem_aops = {
.write_end = shmem_write_end,
#endif
.migratepage = migrate_page,
+ .error_remove_page = generic_error_remove_page,
};
static const struct file_operations shmem_file_operations = {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f1bf19daadc..4de7f02f820 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry)
struct swap_info_struct *p;
struct page *page = NULL;
- if (is_migration_entry(entry))
+ if (non_swap_entry(entry))
return 1;
p = swap_info_get(entry);
@@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache)
int count;
bool has_cache;
- if (is_migration_entry(entry))
+ if (non_swap_entry(entry))
return -EINVAL;
type = swp_type(entry);
diff --git a/mm/truncate.c b/mm/truncate.c
index ccc3ecf7cb9..450cebdabfc 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page);
* its lock, b) when a concurrent invalidate_mapping_pages got there first and
* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
*/
-static void
+static int
truncate_complete_page(struct address_space *mapping, struct page *page)
{
if (page->mapping != mapping)
- return;
+ return -EIO;
if (page_has_private(page))
do_invalidatepage(page, 0);
@@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
remove_from_page_cache(page);
ClearPageMappedToDisk(page);
page_cache_release(page); /* pagecache ref */
+ return 0;
}
/*
@@ -135,6 +136,51 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
return ret;
}
+int truncate_inode_page(struct address_space *mapping, struct page *page)
+{
+ if (page_mapped(page)) {
+ unmap_mapping_range(mapping,
+ (loff_t)page->index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, 0);
+ }
+ return truncate_complete_page(mapping, page);
+}
+
+/*
+ * Used to get rid of pages on hardware memory corruption.
+ */
+int generic_error_remove_page(struct address_space *mapping, struct page *page)
+{
+ if (!mapping)
+ return -EINVAL;
+ /*
+ * Only punch for normal data pages for now.
+ * Handling other types like directories would need more auditing.
+ */
+ if (!S_ISREG(mapping->host->i_mode))
+ return -EIO;
+ return truncate_inode_page(mapping, page);
+}
+EXPORT_SYMBOL(generic_error_remove_page);
+
+/*
+ * Safely invalidate one page from its pagecache mapping.
+ * It only drops clean, unused pages. The page must be locked.
+ *
+ * Returns 1 if the page is successfully invalidated, otherwise 0.
+ */
+int invalidate_inode_page(struct page *page)
+{
+ struct address_space *mapping = page_mapping(page);
+ if (!mapping)
+ return 0;
+ if (PageDirty(page) || PageWriteback(page))
+ return 0;
+ if (page_mapped(page))
+ return 0;
+ return invalidate_complete_page(mapping, page);
+}
+
/**
* truncate_inode_pages - truncate range of pages specified by start & end byte offsets
* @mapping: mapping to truncate
@@ -196,12 +242,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
unlock_page(page);
continue;
}
- if (page_mapped(page)) {
- unmap_mapping_range(mapping,
- (loff_t)page_index<<PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE, 0);
- }
- truncate_complete_page(mapping, page);
+ truncate_inode_page(mapping, page);
unlock_page(page);
}
pagevec_release(&pvec);
@@ -238,15 +279,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
break;
lock_page(page);
wait_on_page_writeback(page);
- if (page_mapped(page)) {
- unmap_mapping_range(mapping,
- (loff_t)page->index<<PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE, 0);
- }
+ truncate_inode_page(mapping, page);
if (page->index > next)
next = page->index;
next++;
- truncate_complete_page(mapping, page);
unlock_page(page);
}
pagevec_release(&pvec);
@@ -311,12 +347,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
if (lock_failed)
continue;
- if (PageDirty(page) || PageWriteback(page))
- goto unlock;
- if (page_mapped(page))
- goto unlock;
- ret += invalidate_complete_page(mapping, page);
-unlock:
+ ret += invalidate_inode_page(page);
+
unlock_page(page);
if (next > end)
break;
@@ -465,3 +497,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
return invalidate_inode_pages2_range(mapping, 0, -1);
}
EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
+
+/**
+ * truncate_pagecache - unmap and remove pagecache that has been truncated
+ * @inode: inode
+ * @old: old file offset
+ * @new: new file offset
+ *
+ * inode's new i_size must already be written before truncate_pagecache
+ * is called.
+ *
+ * This function should typically be called before the filesystem
+ * releases resources associated with the freed range (eg. deallocates
+ * blocks). This way, pagecache will always stay logically coherent
+ * with on-disk format, and the filesystem would not have to deal with
+ * situations such as writepage being called for a page that has already
+ * had its underlying blocks deallocated.
+ */
+void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
+{
+ if (new < old) {
+ struct address_space *mapping = inode->i_mapping;
+
+ /*
+ * unmap_mapping_range is called twice, first simply for
+ * efficiency so that truncate_inode_pages does fewer
+ * single-page unmaps. However after this first call, and
+ * before truncate_inode_pages finishes, it is possible for
+ * private pages to be COWed, which remain after
+ * truncate_inode_pages finishes, hence the second
+ * unmap_mapping_range call must be made for correctness.
+ */
+ unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(mapping, new);
+ unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+ }
+}
+EXPORT_SYMBOL(truncate_pagecache);
+
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page. Ugly, but necessary.
+ */
+int vmtruncate(struct inode *inode, loff_t offset)
+{
+ loff_t oldsize;
+ int error;
+
+ error = inode_newsize_ok(inode, offset);
+ if (error)
+ return error;
+ oldsize = inode->i_size;
+ i_size_write(inode, offset);
+ truncate_pagecache(inode, oldsize, offset);
+ if (inode->i_op->truncate)
+ inode->i_op->truncate(inode);
+
+ return error;
+}
+EXPORT_SYMBOL(vmtruncate);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 613e89f471d..1219ceb8a9b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -663,7 +663,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* processes. Try to unmap it here.
*/
if (page_mapped(page) && mapping) {
- switch (try_to_unmap(page, 0)) {
+ switch (try_to_unmap(page, TTU_UNMAP)) {
case SWAP_FAIL:
goto activate_locked;
case SWAP_AGAIN:
@@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+ gfp_t gfp_mask, bool noswap,
+ unsigned int swappiness,
+ struct zone *zone, int nid)
+{
+ struct scan_control sc = {
+ .may_writepage = !laptop_mode,
+ .may_unmap = 1,
+ .may_swap = !noswap,
+ .swap_cluster_max = SWAP_CLUSTER_MAX,
+ .swappiness = swappiness,
+ .order = 0,
+ .mem_cgroup = mem,
+ .isolate_pages = mem_cgroup_isolate_pages,
+ };
+ nodemask_t nm = nodemask_of_node(nid);
+
+ sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+ (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
+ sc.nodemask = &nm;
+ sc.nr_reclaimed = 0;
+ sc.nr_scanned = 0;
+ /*
+ * NOTE: Although we can get the priority field, using it
+ * here is not a good idea, since it limits the pages we can scan.
+ * if we don't reclaim here, the shrink_zone from balance_pgdat
+ * will pick up pages from other mem cgroup's as well. We hack
+ * the priority and make it zero.
+ */
+ shrink_zone(0, zone, &sc);
+ return sc.nr_reclaimed;
+}
+
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
gfp_t gfp_mask,
bool noswap,
unsigned int swappiness)
{
+ struct zonelist *zonelist;
struct scan_control sc = {
.may_writepage = !laptop_mode,
.may_unmap = 1,
@@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
.isolate_pages = mem_cgroup_isolate_pages,
.nodemask = NULL, /* we don't care the placement */
};
- struct zonelist *zonelist;
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -1974,6 +2007,7 @@ loop_again:
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
int nr_slab;
+ int nid, zid;
if (!populated_zone(zone))
continue;
@@ -1988,6 +2022,15 @@ loop_again:
temp_priority[i] = priority;
sc.nr_scanned = 0;
note_zone_scanning_priority(zone, priority);
+
+ nid = pgdat->node_id;
+ zid = zone_idx(zone);
+ /*
+ * Call soft limit reclaim before calling shrink_zone.
+ * For now we ignore the return value
+ */
+ mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
+ nid, zid);
/*
* We put equal pressure on every zone, unless one
* zone has way too many pages free already.
@@ -2801,10 +2844,10 @@ static void scan_all_zones_unevictable_pages(void)
unsigned long scan_unevictable_pages;
int scan_unevictable_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer,
+ void __user *buffer,
size_t *length, loff_t *ppos)
{
- proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+ proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (write && *(unsigned long *)table->data)
scan_all_zones_unevictable_pages();
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index da0f64f82b5..d6b1b054e29 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1781,8 +1781,8 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
ax25_info.idletimer = ax25_display_timer(&ax25->idletimer) / (60 * HZ);
ax25_info.n2count = ax25->n2count;
ax25_info.state = ax25->state;
- ax25_info.rcv_q = sk_wmem_alloc_get(sk);
- ax25_info.snd_q = sk_rmem_alloc_get(sk);
+ ax25_info.rcv_q = sk_rmem_alloc_get(sk);
+ ax25_info.snd_q = sk_wmem_alloc_get(sk);
ax25_info.vs = ax25->vs;
ax25_info.vr = ax25->vr;
ax25_info.va = ax25->va;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 907a82e9023..a16a2342f6b 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -965,12 +965,12 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
#ifdef CONFIG_SYSCTL
static
-int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp,
+int brnf_sysctl_call_tables(ctl_table * ctl, int write,
void __user * buffer, size_t * lenp, loff_t * ppos)
{
int ret;
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write && *(int *)(ctl->data))
*(int *)(ctl->data) = 1;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0bcecbf0658..4d11c28ca8c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -192,11 +192,10 @@
#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
/* Thread control flag bits */
-#define T_TERMINATE (1<<0)
-#define T_STOP (1<<1) /* Stop run */
-#define T_RUN (1<<2) /* Start run */
-#define T_REMDEVALL (1<<3) /* Remove all devs */
-#define T_REMDEV (1<<4) /* Remove one dev */
+#define T_STOP (1<<0) /* Stop run */
+#define T_RUN (1<<1) /* Start run */
+#define T_REMDEVALL (1<<2) /* Remove all devs */
+#define T_REMDEV (1<<3) /* Remove one dev */
/* If lock -- can be removed after some work */
#define if_lock(t) spin_lock(&(t->if_lock));
@@ -2105,7 +2104,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
{
- ktime_t start;
+ ktime_t start_time, end_time;
s32 remaining;
struct hrtimer_sleeper t;
@@ -2116,7 +2115,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
if (remaining <= 0)
return;
- start = ktime_now();
+ start_time = ktime_now();
if (remaining < 100)
udelay(remaining); /* really small just spin */
else {
@@ -2135,7 +2134,10 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
} while (t.task && pkt_dev->running && !signal_pending(current));
__set_current_state(TASK_RUNNING);
}
- pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), start));
+ end_time = ktime_now();
+
+ pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
+ pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay);
}
static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
@@ -3365,19 +3367,29 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
mutex_unlock(&pktgen_thread_lock);
}
-static void idle(struct pktgen_dev *pkt_dev)
+static void pktgen_resched(struct pktgen_dev *pkt_dev)
{
ktime_t idle_start = ktime_now();
+ schedule();
+ pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+}
- if (need_resched())
- schedule();
- else
- cpu_relax();
+static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
+{
+ ktime_t idle_start = ktime_now();
+ while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+ if (signal_pending(current))
+ break;
+
+ if (need_resched())
+ pktgen_resched(pkt_dev);
+ else
+ cpu_relax();
+ }
pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
}
-
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
struct net_device *odev = pkt_dev->odev;
@@ -3387,36 +3399,21 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
u16 queue_map;
int ret;
- if (pkt_dev->delay) {
- spin(pkt_dev, pkt_dev->next_tx);
-
- /* This is max DELAY, this has special meaning of
- * "never transmit"
- */
- if (pkt_dev->delay == ULLONG_MAX) {
- pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
- return;
- }
- }
-
- if (!pkt_dev->skb) {
- set_cur_queue_map(pkt_dev);
- queue_map = pkt_dev->cur_queue_map;
- } else {
- queue_map = skb_get_queue_mapping(pkt_dev->skb);
+ /* If device is offline, then don't send */
+ if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) {
+ pktgen_stop_device(pkt_dev);
+ return;
}
- txq = netdev_get_tx_queue(odev, queue_map);
- /* Did we saturate the queue already? */
- if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)) {
- /* If device is down, then all queues are permnantly frozen */
- if (netif_running(odev))
- idle(pkt_dev);
- else
- pktgen_stop_device(pkt_dev);
+ /* This is max DELAY, this has special meaning of
+ * "never transmit"
+ */
+ if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
+ pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
return;
}
+ /* If no skb or clone count exhausted then get new one */
if (!pkt_dev->skb || (pkt_dev->last_ok &&
++pkt_dev->clone_count >= pkt_dev->clone_skb)) {
/* build a new pkt */
@@ -3435,54 +3432,45 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->clone_count = 0; /* reset counter */
}
- /* fill_packet() might have changed the queue */
+ if (pkt_dev->delay && pkt_dev->last_ok)
+ spin(pkt_dev, pkt_dev->next_tx);
+
queue_map = skb_get_queue_mapping(pkt_dev->skb);
txq = netdev_get_tx_queue(odev, queue_map);
__netif_tx_lock_bh(txq);
- if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)))
- pkt_dev->last_ok = 0;
- else {
- atomic_inc(&(pkt_dev->skb->users));
+ atomic_inc(&(pkt_dev->skb->users));
- retry_now:
+ if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)))
+ ret = NETDEV_TX_BUSY;
+ else
ret = (*xmit)(pkt_dev->skb, odev);
- switch (ret) {
- case NETDEV_TX_OK:
- txq_trans_update(txq);
- pkt_dev->last_ok = 1;
- pkt_dev->sofar++;
- pkt_dev->seq_num++;
- pkt_dev->tx_bytes += pkt_dev->cur_pkt_size;
- break;
- case NETDEV_TX_LOCKED:
- cpu_relax();
- goto retry_now;
- default: /* Drivers are not supposed to return other values! */
- if (net_ratelimit())
- pr_info("pktgen: %s xmit error: %d\n",
- odev->name, ret);
- pkt_dev->errors++;
- /* fallthru */
- case NETDEV_TX_BUSY:
- /* Retry it next time */
- atomic_dec(&(pkt_dev->skb->users));
- pkt_dev->last_ok = 0;
- }
-
- if (pkt_dev->delay)
- pkt_dev->next_tx = ktime_add_ns(ktime_now(),
- pkt_dev->delay);
+
+ switch (ret) {
+ case NETDEV_TX_OK:
+ txq_trans_update(txq);
+ pkt_dev->last_ok = 1;
+ pkt_dev->sofar++;
+ pkt_dev->seq_num++;
+ pkt_dev->tx_bytes += pkt_dev->cur_pkt_size;
+ break;
+ default: /* Drivers are not supposed to return other values! */
+ if (net_ratelimit())
+ pr_info("pktgen: %s xmit error: %d\n",
+ odev->name, ret);
+ pkt_dev->errors++;
+ /* fallthru */
+ case NETDEV_TX_LOCKED:
+ case NETDEV_TX_BUSY:
+ /* Retry it next time */
+ atomic_dec(&(pkt_dev->skb->users));
+ pkt_dev->last_ok = 0;
}
__netif_tx_unlock_bh(txq);
/* If pkt_dev->count is zero, then run forever */
if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
- while (atomic_read(&(pkt_dev->skb->users)) != 1) {
- if (signal_pending(current))
- break;
- idle(pkt_dev);
- }
+ pktgen_wait_for_skb(pkt_dev);
/* Done with this */
pktgen_stop_device(pkt_dev);
@@ -3515,20 +3503,24 @@ static int pktgen_thread_worker(void *arg)
while (!kthread_should_stop()) {
pkt_dev = next_to_run(t);
- if (!pkt_dev &&
- (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV))
- == 0) {
- prepare_to_wait(&(t->queue), &wait,
- TASK_INTERRUPTIBLE);
- schedule_timeout(HZ / 10);
- finish_wait(&(t->queue), &wait);
+ if (unlikely(!pkt_dev && t->control == 0)) {
+ wait_event_interruptible_timeout(t->queue,
+ t->control != 0,
+ HZ/10);
+ continue;
}
__set_current_state(TASK_RUNNING);
- if (pkt_dev)
+ if (likely(pkt_dev)) {
pktgen_xmit(pkt_dev);
+ if (need_resched())
+ pktgen_resched(pkt_dev);
+ else
+ cpu_relax();
+ }
+
if (t->control & T_STOP) {
pktgen_stop(t);
t->control &= ~(T_STOP);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 1c6a5bb6f0c..6e1f085db06 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -164,7 +164,7 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU
static int min_priority[1];
static int max_priority[] = { 127 }; /* From DECnet spec */
-static int dn_forwarding_proc(ctl_table *, int, struct file *,
+static int dn_forwarding_proc(ctl_table *, int,
void __user *, size_t *, loff_t *);
static int dn_forwarding_sysctl(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
@@ -274,7 +274,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
}
static int dn_forwarding_proc(ctl_table *table, int write,
- struct file *filep,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -290,7 +289,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
dn_db = dev->dn_ptr;
old = dn_db->parms.forwarding;
- err = proc_dointvec(table, write, filep, buffer, lenp, ppos);
+ err = proc_dointvec(table, write, buffer, lenp, ppos);
if ((err >= 0) && write) {
if (dn_db->parms.forwarding < 0)
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 5bcd592ae6d..26b0ab1e9f5 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -165,7 +165,6 @@ static int dn_node_address_strategy(ctl_table *table,
}
static int dn_node_address_handler(ctl_table *table, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -276,7 +275,6 @@ static int dn_def_dev_strategy(ctl_table *table,
static int dn_def_dev_handler(ctl_table *table, int write,
- struct file * filp,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 07336c6201f..e92f1fd28aa 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1270,10 +1270,10 @@ static void inet_forward_change(struct net *net)
}
static int devinet_conf_proc(ctl_table *ctl, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write) {
struct ipv4_devconf *cnf = ctl->extra1;
@@ -1342,12 +1342,12 @@ static int devinet_conf_sysctl(ctl_table *table,
}
static int devinet_sysctl_forward(ctl_table *ctl, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write && *valp != val) {
struct net *net = ctl->extra2;
@@ -1372,12 +1372,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
}
int ipv4_doint_and_flush(ctl_table *ctl, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
struct net *net = ctl->extra2;
if (write && *valp != val)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d9645c94a06..41ada9904d3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -66,10 +66,7 @@
solution, but it supposes maintaing new variable in ALL
skb, even if no tunneling is used.
- Current solution: t->recursion lock breaks dead loops. It looks
- like dev->tbusy flag, but I preferred new variable, because
- the semantics is different. One day, when hard_start_xmit
- will be multithreaded we will have to use skb->encapsulation.
+ Current solution: HARD_TX_LOCK lock breaks dead loops.
@@ -678,11 +675,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
__be32 dst;
int mtu;
- if (tunnel->recursion++) {
- stats->collisions++;
- goto tx_error;
- }
-
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -820,7 +812,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
ip_rt_put(rt);
stats->tx_dropped++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
if (skb->sk)
@@ -888,7 +879,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
nf_reset(skb);
IPTUNNEL_XMIT();
- tunnel->recursion--;
return NETDEV_TX_OK;
tx_error_icmp:
@@ -897,7 +887,6 @@ tx_error_icmp:
tx_error:
stats->tx_errors++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index fc7993e9061..5a0693576e8 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -611,6 +611,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
* Check the arguments are allowable
*/
+ if (optlen < sizeof(struct in_addr))
+ goto e_inval;
+
err = -EFAULT;
if (optlen >= sizeof(struct ip_mreqn)) {
if (copy_from_user(&mreq, optval, sizeof(mreq)))
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 62548cb0923..08ccd344de7 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -402,11 +402,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
__be32 dst = tiph->daddr;
int mtu;
- if (tunnel->recursion++) {
- stats->collisions++;
- goto tx_error;
- }
-
if (skb->protocol != htons(ETH_P_IP))
goto tx_error;
@@ -485,7 +480,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
ip_rt_put(rt);
stats->tx_dropped++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
if (skb->sk)
@@ -523,7 +517,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
nf_reset(skb);
IPTUNNEL_XMIT();
- tunnel->recursion--;
return NETDEV_TX_OK;
tx_error_icmp:
@@ -531,7 +524,6 @@ tx_error_icmp:
tx_error:
stats->tx_errors++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df934731453..bb419925202 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3036,7 +3036,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
#ifdef CONFIG_SYSCTL
static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
if (write) {
@@ -3046,7 +3046,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
memcpy(&ctl, __ctl, sizeof(ctl));
ctl.data = &flush_delay;
- proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
+ proc_dointvec(&ctl, write, buffer, lenp, ppos);
net = (struct net *)__ctl->extra1;
rt_cache_flush(net, flush_delay);
@@ -3106,12 +3106,11 @@ static void rt_secret_reschedule(int old)
}
static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
- struct file *filp,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int old = ip_rt_secret_interval;
- int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
rt_secret_reschedule(old);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4710d219f06..2dcf04d9b00 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -36,7 +36,7 @@ static void set_local_port_range(int range[2])
}
/* Validate changes from /proc interface. */
-static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
+static int ipv4_local_port_range(ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -51,7 +51,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
};
inet_get_local_port_range(range, range + 1);
- ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
if (range[1] < range[0])
@@ -91,7 +91,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table,
}
-static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp,
+static int proc_tcp_congestion_control(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char val[TCP_CA_NAME_MAX];
@@ -103,7 +103,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
tcp_get_default_congestion_control(val);
- ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
ret = tcp_set_default_congestion_control(val);
return ret;
@@ -129,7 +129,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table,
}
static int proc_tcp_available_congestion_control(ctl_table *ctl,
- int write, struct file * filp,
+ int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -140,13 +140,13 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl,
if (!tbl.data)
return -ENOMEM;
tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
- ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
kfree(tbl.data);
return ret;
}
static int proc_allowed_congestion_control(ctl_table *ctl,
- int write, struct file * filp,
+ int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -158,7 +158,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
return -ENOMEM;
tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
- ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
ret = tcp_set_allowed_congestion_control(tbl.data);
kfree(tbl.data);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 55f486d89c8..1fd0a3d775d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3986,14 +3986,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
#ifdef CONFIG_SYSCTL
static
-int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_forward(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
int ret;
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write)
ret = addrconf_fixup_forwarding(ctl, valp, val);
@@ -4090,14 +4090,14 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
}
static
-int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_disable(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
int ret;
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write)
ret = addrconf_disable_ipv6(ctl, valp, val);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 7d25bbe3211..c595bbe1ed9 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1043,11 +1043,6 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
struct net_device_stats *stats = &t->dev->stats;
int ret;
- if (t->recursion++) {
- stats->collisions++;
- goto tx_err;
- }
-
switch (skb->protocol) {
case htons(ETH_P_IP):
ret = ip4ip6_tnl_xmit(skb, dev);
@@ -1062,14 +1057,12 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (ret < 0)
goto tx_err;
- t->recursion--;
return NETDEV_TX_OK;
tx_err:
stats->tx_errors++;
stats->tx_dropped++;
kfree_skb(skb);
- t->recursion--;
return NETDEV_TX_OK;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 7015478797f..498b9b0b0fa 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1735,7 +1735,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
}
}
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct net_device *dev = ctl->extra1;
struct inet6_dev *idev;
@@ -1746,16 +1746,16 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
if (strcmp(ctl->procname, "retrans_time") == 0)
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
else if (strcmp(ctl->procname, "base_reachable_time") == 0)
ret = proc_dointvec_jiffies(ctl, write,
- filp, buffer, lenp, ppos);
+ buffer, lenp, ppos);
else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
(strcmp(ctl->procname, "base_reachable_time_ms") == 0))
ret = proc_dointvec_ms_jiffies(ctl, write,
- filp, buffer, lenp, ppos);
+ buffer, lenp, ppos);
else
ret = -1;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 77aecbe8ff6..d6fe7646a8f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2524,13 +2524,13 @@ static const struct file_operations rt6_stats_seq_fops = {
#ifdef CONFIG_SYSCTL
static
-int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
int delay = net->ipv6.sysctl.flush_delay;
if (write) {
- proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ proc_dointvec(ctl, write, buffer, lenp, ppos);
fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
return 0;
} else
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0ae4f644818..fcb53962884 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -626,11 +626,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
struct in6_addr *addr6;
int addr_type;
- if (tunnel->recursion++) {
- stats->collisions++;
- goto tx_error;
- }
-
if (skb->protocol != htons(ETH_P_IPV6))
goto tx_error;
@@ -753,7 +748,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
ip_rt_put(rt);
stats->tx_dropped++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
if (skb->sk)
@@ -794,7 +788,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
nf_reset(skb);
IPTUNNEL_XMIT();
- tunnel->recursion--;
return NETDEV_TX_OK;
tx_error_icmp:
@@ -802,7 +795,6 @@ tx_error_icmp:
tx_error:
stats->tx_errors++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index 57f8817c397..5c86567e5a7 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -73,12 +73,12 @@ static int min_lap_keepalive_time = 100; /* 100us */
/* For other sysctl, I've no idea of the range. Maybe Dag could help
* us on that - Jean II */
-static int do_devname(ctl_table *table, int write, struct file *filp,
+static int do_devname(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
- ret = proc_dostring(table, write, filp, buffer, lenp, ppos);
+ ret = proc_dostring(table, write, buffer, lenp, ppos);
if (ret == 0 && write) {
struct ias_value *val;
@@ -90,12 +90,12 @@ static int do_devname(ctl_table *table, int write, struct file *filp,
}
-static int do_discovery(ctl_table *table, int write, struct file *filp,
+static int do_discovery(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
- ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret)
return ret;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 039901109fa..71e10cabf81 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -90,8 +90,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
bss->dtim_period = tim_ie->dtim_period;
}
- /* set default value for buggy APs */
- if (!elems->tim || bss->dtim_period == 0)
+ /* set default value for buggy AP/no TIM element */
+ if (bss->dtim_period == 0)
bss->dtim_period = 1;
bss->supp_rates_len = 0;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fba2892b99e..446e9bd4b4b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1496,14 +1496,14 @@ static int ip_vs_zero_all(void)
static int
-proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+proc_do_defense_mode(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = table->data;
int val = *valp;
int rc;
- rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ rc = proc_dointvec(table, write, buffer, lenp, ppos);
if (write && (*valp != val)) {
if ((*valp < 0) || (*valp > 3)) {
/* Restore the correct value */
@@ -1517,7 +1517,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
static int
-proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+proc_do_sync_threshold(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = table->data;
@@ -1527,7 +1527,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
/* backup the value first */
memcpy(val, valp, sizeof(val));
- rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ rc = proc_dointvec(table, write, buffer, lenp, ppos);
if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
/* Restore the correct value */
memcpy(valp, val, sizeof(val));
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 4e620305f28..c93494fef8e 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -226,7 +226,7 @@ static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
static struct ctl_table_header *nf_log_dir_header;
-static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
+static int nf_log_proc_dostring(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
const struct nf_logger *logger;
@@ -260,7 +260,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
table->data = "NONE";
else
table->data = logger->name;
- r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+ r = proc_dostring(table, write, buffer, lenp, ppos);
mutex_unlock(&nf_log_mutex);
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 55180b99562..a4bafbf1509 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1609,6 +1609,16 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
return err;
}
+void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
+{
+ struct sock *sk;
+ struct hlist_node *node;
+ struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+
+ sk_for_each_bound(sk, node, &tbl->mc_list)
+ netlink_update_socket_mc(nlk_sk(sk), group, 0);
+}
+
/**
* netlink_clear_multicast_users - kick off multicast listeners
*
@@ -1619,15 +1629,8 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
*/
void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
{
- struct sock *sk;
- struct hlist_node *node;
- struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
-
netlink_table_grab();
-
- sk_for_each_bound(sk, node, &tbl->mc_list)
- netlink_update_socket_mc(nlk_sk(sk), group, 0);
-
+ __netlink_clear_multicast_users(ksk, group);
netlink_table_ungrab();
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 566941e0336..44ff3f3810f 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -220,10 +220,12 @@ static void __genl_unregister_mc_group(struct genl_family *family,
struct net *net;
BUG_ON(grp->family != family);
+ netlink_table_grab();
rcu_read_lock();
for_each_net_rcu(net)
- netlink_clear_multicast_users(net->genl_sock, grp->id);
+ __netlink_clear_multicast_users(net->genl_sock, grp->id);
rcu_read_unlock();
+ netlink_table_ungrab();
clear_bit(grp->id, mc_groups);
list_del(&grp->list);
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index a662e62a99c..f60c0c2aacb 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -168,6 +168,12 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
goto drop;
}
+ /* Broadcast sending is not implemented */
+ if (pn_addr(dst) == PNADDR_BROADCAST) {
+ err = -EOPNOTSUPP;
+ goto drop;
+ }
+
skb_reset_transport_header(skb);
WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */
skb_push(skb, sizeof(struct phonethdr));
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 7a4ee397d2f..07aa9f08d5f 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -113,6 +113,8 @@ void pn_sock_unhash(struct sock *sk)
}
EXPORT_SYMBOL(pn_sock_unhash);
+static DEFINE_MUTEX(port_mutex);
+
static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
{
struct sock *sk = sock->sk;
@@ -140,9 +142,11 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
err = -EINVAL; /* attempt to rebind */
goto out;
}
+ WARN_ON(sk_hashed(sk));
+ mutex_lock(&port_mutex);
err = sk->sk_prot->get_port(sk, pn_port(handle));
if (err)
- goto out;
+ goto out_port;
/* get_port() sets the port, bind() sets the address if applicable */
pn->sobject = pn_object(saddr, pn_port(pn->sobject));
@@ -150,6 +154,8 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
/* Enable RX on the socket */
sk->sk_prot->hash(sk);
+out_port:
+ mutex_unlock(&port_mutex);
out:
release_sock(sk);
return err;
@@ -357,8 +363,6 @@ const struct proto_ops phonet_stream_ops = {
};
EXPORT_SYMBOL(phonet_stream_ops);
-static DEFINE_MUTEX(port_mutex);
-
/* allocate port for a socket */
int pn_sock_get_port(struct sock *sk, unsigned short sport)
{
@@ -370,9 +374,7 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport)
memset(&try_sa, 0, sizeof(struct sockaddr_pn));
try_sa.spn_family = AF_PHONET;
-
- mutex_lock(&port_mutex);
-
+ WARN_ON(!mutex_is_locked(&port_mutex));
if (!sport) {
/* search free port */
int port, pmin, pmax;
@@ -401,8 +403,6 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport)
else
sock_put(tmpsk);
}
- mutex_unlock(&port_mutex);
-
/* the port must be in use already */
return -EADDRINUSE;
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
index 7b5749ee276..2220f332232 100644
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c
@@ -56,7 +56,7 @@ void phonet_get_local_port_range(int *min, int *max)
} while (read_seqretry(&local_port_range_lock, seq));
}
-static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
+static int proc_local_port_range(ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -70,7 +70,7 @@ static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
.extra2 = &local_port_range_max,
};
- ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
if (range[1] < range[0])
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index c70dd7f5258..1db618f56ec 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -8,7 +8,6 @@
#include <linux/types.h>
#include <linux/module.h>
-#include <linux/utsname.h>
#include <linux/sunrpc/clnt.h>
#ifdef RPC_DEBUG
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a417d5ab5dd..38829e20500 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -640,10 +640,11 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
/**
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
- * @ops: RPC call ops
+ * @req: RPC request
+ * @tk_ops: RPC call ops
*/
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
- const struct rpc_call_ops *tk_ops)
+ const struct rpc_call_ops *tk_ops)
{
struct rpc_task *task;
struct xdr_buf *xbufp = &req->rq_snd_buf;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 858a443f418..49278f83036 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -860,7 +860,8 @@ static void rpc_clntdir_depopulate(struct dentry *dentry)
/**
* rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs
- * @path: path from the rpc_pipefs root to the new directory
+ * @dentry: dentry from the rpc_pipefs root to the new directory
+ * @name: &struct qstr for the name
* @rpc_client: rpc client to associate with this directory
*
* This creates a directory at the given @path associated with
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 5231f7aaac0..42f9748ae09 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -56,7 +56,7 @@ rpc_unregister_sysctl(void)
}
}
-static int proc_do_xprt(ctl_table *table, int write, struct file *file,
+static int proc_do_xprt(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[256];
@@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file,
}
static int
-proc_dodebug(ctl_table *table, int write, struct file *file,
+proc_dodebug(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[20], c, *s;
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 87101177825..35fb68b9c8e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep;
* current value.
*/
static int read_reset_stat(ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
atomic_t *stat = (atomic_t *)table->data;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index bee41546575..37c5475ba25 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -773,6 +773,7 @@ static void xs_close(struct rpc_xprt *xprt)
dprintk("RPC: xs_close xprt %p\n", xprt);
xs_reset_transport(transport);
+ xprt->reestablish_timeout = 0;
smp_mb__before_clear_bit();
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
@@ -1264,6 +1265,12 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
if (xprt->shutdown)
goto out;
+ /* Any data means we had a useful conversation, so
+ * the we don't need to delay the next reconnect
+ */
+ if (xprt->reestablish_timeout)
+ xprt->reestablish_timeout = 0;
+
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
rd_desc.arg.data = xprt;
do {
@@ -2034,6 +2041,8 @@ static void xs_connect(struct rpc_task *task)
&transport->connect_worker,
xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
} else {
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index d16cd9ea4d0..bf725275eb8 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -26,11 +26,11 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
wdev->wext.connect.ie = wdev->wext.ie;
wdev->wext.connect.ie_len = wdev->wext.ie_len;
- wdev->wext.connect.privacy = wdev->wext.default_key != -1;
if (wdev->wext.keys) {
wdev->wext.keys->def = wdev->wext.default_key;
wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key;
+ wdev->wext.connect.privacy = true;
}
if (!wdev->wext.connect.ssid_len)
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index c29be8f9024..4f9c1908593 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -83,11 +83,12 @@ TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/)
# is automatically cleaned up.
try-run = $(shell set -e; \
TMP="$(TMPOUT).$$$$.tmp"; \
+ TMPO="$(TMPOUT).$$$$.o"; \
if ($(1)) >/dev/null 2>&1; \
then echo "$(2)"; \
else echo "$(3)"; \
fi; \
- rm -f "$$TMP")
+ rm -f "$$TMP" "$$TMPO")
# as-option
# Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
@@ -105,12 +106,12 @@ as-instr = $(call try-run,\
# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
cc-option = $(call try-run,\
- $(CC) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
+ $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
# cc-option-yn
# Usage: flag := $(call cc-option-yn,-march=winchip-c6)
cc-option-yn = $(call try-run,\
- $(CC) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n)
+ $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n)
# cc-option-align
# Prefix align with either -falign or -malign
@@ -130,10 +131,15 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \
# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
cc-ifversion = $(shell [ $(call cc-version, $(CC)) $(1) $(2) ] && echo $(3))
+# cc-ldoption
+# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+cc-ldoption = $(call try-run,\
+ $(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2))
+
# ld-option
-# Usage: ldflags += $(call ld-option, -Wl$(comma)--hash-style=both)
+# Usage: LDFLAGS += $(call ld-option, -X)
ld-option = $(call try-run,\
- $(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2))
+ $(CC) /dev/null -c -o "$$TMPO" ; $(LD) $(1) "$$TMPO" -o "$$TMP",$(1),$(2))
######
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 5c4b7a400c1..341b58902ff 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -206,7 +206,7 @@ cmd_modversions = \
endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD
-cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
"$(if $(CONFIG_64BIT),64,32)" \
"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
"$(if $(part-of-module),1,0)" "$(@)";
@@ -216,6 +216,7 @@ define rule_cc_o_c
$(call echo-cmd,checksrc) $(cmd_checksrc) \
$(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \
$(cmd_modversions) \
+ $(call echo-cmd,record_mcount) \
$(cmd_record_mcount) \
scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \
$(dot-target).tmp; \
@@ -269,7 +270,8 @@ targets += $(extra-y) $(MAKECMDGOALS) $(always)
# Linker scripts preprocessor (.lds.S -> .lds)
# ---------------------------------------------------------------------------
quiet_cmd_cpp_lds_S = LDS $@
- cmd_cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ -o $@ $<
+ cmd_cpp_lds_S = $(CPP) $(cpp_flags) -P -C -U$(ARCH) \
+ -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
$(obj)/%.lds: $(src)/%.lds.S FORCE
$(call if_changed_dep,cpp_lds_S)
diff --git a/scripts/basic/docproc.c b/scripts/basic/docproc.c
index 99ca7a69868..79ab973fb43 100644
--- a/scripts/basic/docproc.c
+++ b/scripts/basic/docproc.c
@@ -71,7 +71,7 @@ FILELINE * docsection;
static char *srctree, *kernsrctree;
-void usage (void)
+static void usage (void)
{
fprintf(stderr, "Usage: docproc {doc|depend} file\n");
fprintf(stderr, "Input is read from file.tmpl. Output is sent to stdout\n");
@@ -84,7 +84,7 @@ void usage (void)
/*
* Execute kernel-doc with parameters given in svec
*/
-void exec_kernel_doc(char **svec)
+static void exec_kernel_doc(char **svec)
{
pid_t pid;
int ret;
@@ -129,7 +129,7 @@ struct symfile
struct symfile symfilelist[MAXFILES];
int symfilecnt = 0;
-void add_new_symbol(struct symfile *sym, char * symname)
+static void add_new_symbol(struct symfile *sym, char * symname)
{
sym->symbollist =
realloc(sym->symbollist, (sym->symbolcnt + 1) * sizeof(char *));
@@ -137,14 +137,14 @@ void add_new_symbol(struct symfile *sym, char * symname)
}
/* Add a filename to the list */
-struct symfile * add_new_file(char * filename)
+static struct symfile * add_new_file(char * filename)
{
symfilelist[symfilecnt++].filename = strdup(filename);
return &symfilelist[symfilecnt - 1];
}
/* Check if file already are present in the list */
-struct symfile * filename_exist(char * filename)
+static struct symfile * filename_exist(char * filename)
{
int i;
for (i=0; i < symfilecnt; i++)
@@ -157,20 +157,20 @@ struct symfile * filename_exist(char * filename)
* List all files referenced within the template file.
* Files are separated by tabs.
*/
-void adddep(char * file) { printf("\t%s", file); }
-void adddep2(char * file, char * line) { line = line; adddep(file); }
-void noaction(char * line) { line = line; }
-void noaction2(char * file, char * line) { file = file; line = line; }
+static void adddep(char * file) { printf("\t%s", file); }
+static void adddep2(char * file, char * line) { line = line; adddep(file); }
+static void noaction(char * line) { line = line; }
+static void noaction2(char * file, char * line) { file = file; line = line; }
/* Echo the line without further action */
-void printline(char * line) { printf("%s", line); }
+static void printline(char * line) { printf("%s", line); }
/*
* Find all symbols in filename that are exported with EXPORT_SYMBOL &
* EXPORT_SYMBOL_GPL (& EXPORT_SYMBOL_GPL_FUTURE implicitly).
* All symbols located are stored in symfilelist.
*/
-void find_export_symbols(char * filename)
+static void find_export_symbols(char * filename)
{
FILE * fp;
struct symfile *sym;
@@ -227,7 +227,7 @@ void find_export_symbols(char * filename)
* intfunc uses -nofunction
* extfunc uses -function
*/
-void docfunctions(char * filename, char * type)
+static void docfunctions(char * filename, char * type)
{
int i,j;
int symcnt = 0;
@@ -258,15 +258,15 @@ void docfunctions(char * filename, char * type)
fflush(stdout);
free(vec);
}
-void intfunc(char * filename) { docfunctions(filename, NOFUNCTION); }
-void extfunc(char * filename) { docfunctions(filename, FUNCTION); }
+static void intfunc(char * filename) { docfunctions(filename, NOFUNCTION); }
+static void extfunc(char * filename) { docfunctions(filename, FUNCTION); }
/*
* Document specific function(s) in a file.
* Call kernel-doc with the following parameters:
* kernel-doc -docbook -function function1 [-function function2]
*/
-void singfunc(char * filename, char * line)
+static void singfunc(char * filename, char * line)
{
char *vec[200]; /* Enough for specific functions */
int i, idx = 0;
@@ -297,7 +297,7 @@ void singfunc(char * filename, char * line)
* Call kernel-doc with the following parameters:
* kernel-doc -docbook -function "doc section" filename
*/
-void docsect(char *filename, char *line)
+static void docsect(char *filename, char *line)
{
char *vec[6]; /* kerneldoc -docbook -function "section" file NULL */
char *s;
@@ -324,7 +324,7 @@ void docsect(char *filename, char *line)
* 5) Lines containing !P
* 6) Default lines - lines not matching the above
*/
-void parse_file(FILE *infile)
+static void parse_file(FILE *infile)
{
char line[MAXLINESZ];
char * s;
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 8ab44861168..6bf21f83837 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -124,7 +124,7 @@ char *target;
char *depfile;
char *cmdline;
-void usage(void)
+static void usage(void)
{
fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n");
exit(1);
@@ -133,7 +133,7 @@ void usage(void)
/*
* Print out the commandline prefixed with cmd_<target filename> :=
*/
-void print_cmdline(void)
+static void print_cmdline(void)
{
printf("cmd_%s := %s\n\n", target, cmdline);
}
@@ -146,7 +146,7 @@ int len_config = 0;
* Grow the configuration string to a desired length.
* Usually the first growth is plenty.
*/
-void grow_config(int len)
+static void grow_config(int len)
{
while (len_config + len > size_config) {
if (size_config == 0)
@@ -162,7 +162,7 @@ void grow_config(int len)
/*
* Lookup a value in the configuration string.
*/
-int is_defined_config(const char * name, int len)
+static int is_defined_config(const char * name, int len)
{
const char * pconfig;
const char * plast = str_config + len_config - len;
@@ -178,7 +178,7 @@ int is_defined_config(const char * name, int len)
/*
* Add a new value to the configuration string.
*/
-void define_config(const char * name, int len)
+static void define_config(const char * name, int len)
{
grow_config(len + 1);
@@ -190,7 +190,7 @@ void define_config(const char * name, int len)
/*
* Clear the set of configuration strings.
*/
-void clear_config(void)
+static void clear_config(void)
{
len_config = 0;
define_config("", 0);
@@ -199,7 +199,7 @@ void clear_config(void)
/*
* Record the use of a CONFIG_* word.
*/
-void use_config(char *m, int slen)
+static void use_config(char *m, int slen)
{
char s[PATH_MAX];
char *p;
@@ -220,7 +220,7 @@ void use_config(char *m, int slen)
printf(" $(wildcard include/config/%s.h) \\\n", s);
}
-void parse_config_file(char *map, size_t len)
+static void parse_config_file(char *map, size_t len)
{
int *end = (int *) (map + len);
/* start at +1, so that p can never be < map */
@@ -254,7 +254,7 @@ void parse_config_file(char *map, size_t len)
}
/* test is s ends in sub */
-int strrcmp(char *s, char *sub)
+static int strrcmp(char *s, char *sub)
{
int slen = strlen(s);
int sublen = strlen(sub);
@@ -265,7 +265,7 @@ int strrcmp(char *s, char *sub)
return memcmp(s + slen - sublen, sub, sublen);
}
-void do_config_file(char *filename)
+static void do_config_file(char *filename)
{
struct stat st;
int fd;
@@ -296,7 +296,7 @@ void do_config_file(char *filename)
close(fd);
}
-void parse_dep_file(void *map, size_t len)
+static void parse_dep_file(void *map, size_t len)
{
char *m = map;
char *end = m + len;
@@ -336,7 +336,7 @@ void parse_dep_file(void *map, size_t len)
printf("$(deps_%s):\n", target);
}
-void print_deps(void)
+static void print_deps(void)
{
struct stat st;
int fd;
@@ -368,7 +368,7 @@ void print_deps(void)
close(fd);
}
-void traps(void)
+static void traps(void)
{
static char test[] __attribute__((aligned(sizeof(int)))) = "CONF";
int *p = (int *)test;
diff --git a/scripts/basic/hash.c b/scripts/basic/hash.c
index 3299ad7fc8c..2ef5d3f666b 100644
--- a/scripts/basic/hash.c
+++ b/scripts/basic/hash.c
@@ -21,7 +21,7 @@ static void usage(void)
* http://www.cse.yorku.ca/~oz/hash.html
*/
-unsigned int djb2_hash(char *str)
+static unsigned int djb2_hash(char *str)
{
unsigned long hash = 5381;
int c;
@@ -34,7 +34,7 @@ unsigned int djb2_hash(char *str)
return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
}
-unsigned int r5_hash(char *str)
+static unsigned int r5_hash(char *str)
{
unsigned long hash = 0;
int c;
diff --git a/scripts/checkincludes.pl b/scripts/checkincludes.pl
index 8e6b716c191..676ddc07d6f 100755
--- a/scripts/checkincludes.pl
+++ b/scripts/checkincludes.pl
@@ -1,24 +1,85 @@
#!/usr/bin/perl
#
-# checkincludes: Find files included more than once in (other) files.
+# checkincludes: find/remove files included more than once
+#
# Copyright abandoned, 2000, Niels Kristian Bech Jensen <nkbj@image.dk>.
+# Copyright 2009 Luis R. Rodriguez <mcgrof@gmail.com>
+#
+# This script checks for duplicate includes. It also has support
+# to remove them in place. Note that this will not take into
+# consideration macros so you should run this only if you know
+# you do have real dups and do not have them under #ifdef's. You
+# could also just review the results.
+
+sub usage {
+ print "Usage: checkincludes.pl [-r]\n";
+ print "By default we just warn of duplicates\n";
+ print "To remove duplicated includes in place use -r\n";
+ exit 1;
+}
+
+my $remove = 0;
+
+if ($#ARGV < 0) {
+ usage();
+}
+
+if ($#ARGV >= 1) {
+ if ($ARGV[0] =~ /^-/) {
+ if ($ARGV[0] eq "-r") {
+ $remove = 1;
+ shift;
+ } else {
+ usage();
+ }
+ }
+}
foreach $file (@ARGV) {
open(FILE, $file) or die "Cannot open $file: $!.\n";
my %includedfiles = ();
+ my @file_lines = ();
while (<FILE>) {
if (m/^\s*#\s*include\s*[<"](\S*)[>"]/o) {
++$includedfiles{$1};
}
+ push(@file_lines, $_);
}
-
- foreach $filename (keys %includedfiles) {
- if ($includedfiles{$filename} > 1) {
- print "$file: $filename is included more than once.\n";
+
+ close(FILE);
+
+ if (!$remove) {
+ foreach $filename (keys %includedfiles) {
+ if ($includedfiles{$filename} > 1) {
+ print "$file: $filename is included more than once.\n";
+ }
}
+ next;
}
+ open(FILE,">$file") || die("Cannot write to $file: $!");
+
+ my $dups = 0;
+ foreach (@file_lines) {
+ if (m/^\s*#\s*include\s*[<"](\S*)[>"]/o) {
+ foreach $filename (keys %includedfiles) {
+ if ($1 eq $filename) {
+ if ($includedfiles{$filename} > 1) {
+ $includedfiles{$filename}--;
+ $dups++;
+ } else {
+ print FILE $_;
+ }
+ }
+ }
+ } else {
+ print FILE $_;
+ }
+ }
+ if ($dups > 0) {
+ print "$file: removed $dups duplicate includes\n";
+ }
close(FILE);
}
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index 3baaaecd6b1..9960d1c303f 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -38,14 +38,14 @@ static int conf_cnt;
static char line[128];
static struct menu *rootEntry;
-static char nohelp_text[] = N_("Sorry, no help available for this option yet.\n");
-
-static const char *get_help(struct menu *menu)
+static void print_help(struct menu *menu)
{
- if (menu_has_help(menu))
- return _(menu_get_help(menu));
- else
- return nohelp_text;
+ struct gstr help = str_new();
+
+ menu_get_ext_help(menu, &help);
+
+ printf("\n%s\n", str_get(&help));
+ str_free(&help);
}
static void strip(char *str)
@@ -121,7 +121,7 @@ static int conf_askvalue(struct symbol *sym, const char *def)
return 1;
}
-int conf_string(struct menu *menu)
+static int conf_string(struct menu *menu)
{
struct symbol *sym = menu->sym;
const char *def;
@@ -140,7 +140,7 @@ int conf_string(struct menu *menu)
case '?':
/* print help */
if (line[1] == '\n') {
- printf("\n%s\n", get_help(menu));
+ print_help(menu);
def = NULL;
break;
}
@@ -220,7 +220,7 @@ static int conf_sym(struct menu *menu)
if (sym_set_tristate_value(sym, newval))
return 0;
help:
- printf("\n%s\n", get_help(menu));
+ print_help(menu);
}
}
@@ -307,7 +307,7 @@ static int conf_choice(struct menu *menu)
fgets(line, 128, stdin);
strip(line);
if (line[0] == '?') {
- printf("\n%s\n", get_help(menu));
+ print_help(menu);
continue;
}
if (!line[0])
@@ -331,7 +331,7 @@ static int conf_choice(struct menu *menu)
if (!child)
continue;
if (line[strlen(line) - 1] == '?') {
- printf("\n%s\n", get_help(child));
+ print_help(child);
continue;
}
sym_set_choice_value(sym, child->sym);
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index a04da3459f0..b55e72ff2fc 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -560,7 +560,7 @@ int conf_write(const char *name)
return 0;
}
-int conf_split_config(void)
+static int conf_split_config(void)
{
const char *name;
char path[128];
diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c
index 579ece4fa58..edd3f39a080 100644
--- a/scripts/kconfig/expr.c
+++ b/scripts/kconfig/expr.c
@@ -348,7 +348,7 @@ struct expr *expr_trans_bool(struct expr *e)
/*
* e1 || e2 -> ?
*/
-struct expr *expr_join_or(struct expr *e1, struct expr *e2)
+static struct expr *expr_join_or(struct expr *e1, struct expr *e2)
{
struct expr *tmp;
struct symbol *sym1, *sym2;
@@ -412,7 +412,7 @@ struct expr *expr_join_or(struct expr *e1, struct expr *e2)
return NULL;
}
-struct expr *expr_join_and(struct expr *e1, struct expr *e2)
+static struct expr *expr_join_and(struct expr *e1, struct expr *e2)
{
struct expr *tmp;
struct symbol *sym1, *sym2;
@@ -1098,6 +1098,8 @@ void expr_fprint(struct expr *e, FILE *out)
static void expr_print_gstr_helper(void *data, struct symbol *sym, const char *str)
{
str_append((struct gstr*)data, str);
+ if (sym)
+ str_printf((struct gstr*)data, " [=%s]", sym_get_string_value(sym));
}
void expr_gstr_print(struct expr *e, struct gstr *gs)
diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c
index 199b22bb49e..65464366fe3 100644
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c
@@ -456,19 +456,9 @@ static void text_insert_help(struct menu *menu)
GtkTextBuffer *buffer;
GtkTextIter start, end;
const char *prompt = _(menu_get_prompt(menu));
- gchar *name;
- const char *help;
+ struct gstr help = str_new();
- help = menu_get_help(menu);
-
- /* Gettextize if the help text not empty */
- if ((help != 0) && (help[0] != 0))
- help = _(help);
-
- if (menu->sym && menu->sym->name)
- name = g_strdup_printf(menu->sym->name);
- else
- name = g_strdup("");
+ menu_get_ext_help(menu, &help);
buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w));
gtk_text_buffer_get_bounds(buffer, &start, &end);
@@ -478,14 +468,11 @@ static void text_insert_help(struct menu *menu)
gtk_text_buffer_get_end_iter(buffer, &end);
gtk_text_buffer_insert_with_tags(buffer, &end, prompt, -1, tag1,
NULL);
- gtk_text_buffer_insert_at_cursor(buffer, " ", 1);
- gtk_text_buffer_get_end_iter(buffer, &end);
- gtk_text_buffer_insert_with_tags(buffer, &end, name, -1, tag1,
- NULL);
gtk_text_buffer_insert_at_cursor(buffer, "\n\n", 2);
gtk_text_buffer_get_end_iter(buffer, &end);
- gtk_text_buffer_insert_with_tags(buffer, &end, help, -1, tag2,
+ gtk_text_buffer_insert_with_tags(buffer, &end, str_get(&help), -1, tag2,
NULL);
+ str_free(&help);
}
diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade
index 803233fdd6d..b1c86c19292 100644
--- a/scripts/kconfig/gconf.glade
+++ b/scripts/kconfig/gconf.glade
@@ -547,7 +547,7 @@
<property name="headers_visible">True</property>
<property name="rules_hint">False</property>
<property name="reorderable">False</property>
- <property name="enable_search">True</property>
+ <property name="enable_search">False</property>
<signal name="cursor_changed" handler="on_treeview2_cursor_changed" last_modification_time="Sun, 12 Jan 2003 15:58:22 GMT"/>
<signal name="button_press_event" handler="on_treeview1_button_press_event" last_modification_time="Sun, 12 Jan 2003 16:03:52 GMT"/>
<signal name="key_press_event" handler="on_treeview2_key_press_event" last_modification_time="Sun, 12 Jan 2003 16:11:44 GMT"/>
@@ -582,7 +582,7 @@
<property name="headers_visible">True</property>
<property name="rules_hint">False</property>
<property name="reorderable">False</property>
- <property name="enable_search">True</property>
+ <property name="enable_search">False</property>
<signal name="cursor_changed" handler="on_treeview2_cursor_changed" last_modification_time="Sun, 12 Jan 2003 15:57:55 GMT"/>
<signal name="button_press_event" handler="on_treeview2_button_press_event" last_modification_time="Sun, 12 Jan 2003 15:57:58 GMT"/>
<signal name="key_press_event" handler="on_treeview2_key_press_event" last_modification_time="Sun, 12 Jan 2003 15:58:01 GMT"/>
diff --git a/scripts/kconfig/kxgettext.c b/scripts/kconfig/kxgettext.c
index 8d9ce22b0fc..dcc3fcc0cc9 100644
--- a/scripts/kconfig/kxgettext.c
+++ b/scripts/kconfig/kxgettext.c
@@ -166,7 +166,7 @@ static int message__add(const char *msg, char *option, char *file, int lineno)
return rc;
}
-void menu_build_message_list(struct menu *menu)
+static void menu_build_message_list(struct menu *menu)
{
struct menu *child;
@@ -211,7 +211,7 @@ static void message__print_gettext_msgid_msgstr(struct message *self)
"msgstr \"\"\n", self->msg);
}
-void menu__xgettext(void)
+static void menu__xgettext(void)
{
struct message *m = message__list;
diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h
index 8e69461313d..ffeb532b2cf 100644
--- a/scripts/kconfig/lkc_proto.h
+++ b/scripts/kconfig/lkc_proto.h
@@ -17,6 +17,8 @@ P(menu_get_root_menu,struct menu *,(struct menu *menu));
P(menu_get_parent_menu,struct menu *,(struct menu *menu));
P(menu_has_help,bool,(struct menu *menu));
P(menu_get_help,const char *,(struct menu *menu));
+P(get_symbol_str,void,(struct gstr *r, struct symbol *sym));
+P(menu_get_ext_help,void,(struct menu *menu, struct gstr *help));
/* symbol.c */
P(symbol_hash,struct symbol *,[SYMBOL_HASHSIZE]);
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 25b60bc117f..d8295357358 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -199,8 +199,6 @@ inputbox_instructions_string[] = N_(
setmod_text[] = N_(
"This feature depends on another which has been configured as a module.\n"
"As a result, this feature will be built as a module."),
-nohelp_text[] = N_(
- "There is no help available for this kernel option.\n"),
load_config_text[] = N_(
"Enter the name of the configuration file you wish to load. "
"Accept the name shown to restore the configuration you "
@@ -284,66 +282,6 @@ static void show_textbox(const char *title, const char *text, int r, int c);
static void show_helptext(const char *title, const char *text);
static void show_help(struct menu *menu);
-static void get_prompt_str(struct gstr *r, struct property *prop)
-{
- int i, j;
- struct menu *submenu[8], *menu;
-
- str_printf(r, _("Prompt: %s\n"), _(prop->text));
- str_printf(r, _(" Defined at %s:%d\n"), prop->menu->file->name,
- prop->menu->lineno);
- if (!expr_is_yes(prop->visible.expr)) {
- str_append(r, _(" Depends on: "));
- expr_gstr_print(prop->visible.expr, r);
- str_append(r, "\n");
- }
- menu = prop->menu->parent;
- for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent)
- submenu[i++] = menu;
- if (i > 0) {
- str_printf(r, _(" Location:\n"));
- for (j = 4; --i >= 0; j += 2) {
- menu = submenu[i];
- str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu)));
- if (menu->sym) {
- str_printf(r, " (%s [=%s])", menu->sym->name ?
- menu->sym->name : _("<choice>"),
- sym_get_string_value(menu->sym));
- }
- str_append(r, "\n");
- }
- }
-}
-
-static void get_symbol_str(struct gstr *r, struct symbol *sym)
-{
- bool hit;
- struct property *prop;
-
- if (sym && sym->name)
- str_printf(r, "Symbol: %s [=%s]\n", sym->name,
- sym_get_string_value(sym));
- for_all_prompts(sym, prop)
- get_prompt_str(r, prop);
- hit = false;
- for_all_properties(sym, prop, P_SELECT) {
- if (!hit) {
- str_append(r, " Selects: ");
- hit = true;
- } else
- str_printf(r, " && ");
- expr_gstr_print(prop->expr, r);
- }
- if (hit)
- str_append(r, "\n");
- if (sym->rev_dep.expr) {
- str_append(r, _(" Selected by: "));
- expr_gstr_print(sym->rev_dep.expr, r);
- str_append(r, "\n");
- }
- str_append(r, "\n\n");
-}
-
static struct gstr get_relations_str(struct symbol **sym_arr)
{
struct symbol *sym;
@@ -699,19 +637,9 @@ static void show_helptext(const char *title, const char *text)
static void show_help(struct menu *menu)
{
struct gstr help = str_new();
- struct symbol *sym = menu->sym;
-
- if (menu_has_help(menu))
- {
- if (sym->name) {
- str_printf(&help, "CONFIG_%s:\n\n", sym->name);
- str_append(&help, _(menu_get_help(menu)));
- str_append(&help, "\n");
- }
- } else {
- str_append(&help, nohelp_text);
- }
- get_symbol_str(&help, sym);
+
+ menu_get_ext_help(menu, &help);
+
show_helptext(_(menu_get_prompt(menu)), str_get(&help));
str_free(&help);
}
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index 07ff8d105c9..059a2465c57 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -9,6 +9,9 @@
#define LKC_DIRECT_LINK
#include "lkc.h"
+static const char nohelp_text[] = N_(
+ "There is no help available for this kernel option.\n");
+
struct menu rootmenu;
static struct menu **last_entry_ptr;
@@ -74,7 +77,7 @@ void menu_end_menu(void)
current_menu = current_menu->parent;
}
-struct expr *menu_check_dep(struct expr *e)
+static struct expr *menu_check_dep(struct expr *e)
{
if (!e)
return e;
@@ -184,7 +187,7 @@ static int menu_range_valid_sym(struct symbol *sym, struct symbol *sym2)
(sym2->type == S_UNKNOWN && sym_string_valid(sym, sym2->name));
}
-void sym_check_prop(struct symbol *sym)
+static void sym_check_prop(struct symbol *sym)
{
struct property *prop;
struct symbol *sym2;
@@ -451,3 +454,80 @@ const char *menu_get_help(struct menu *menu)
else
return "";
}
+
+static void get_prompt_str(struct gstr *r, struct property *prop)
+{
+ int i, j;
+ struct menu *submenu[8], *menu;
+
+ str_printf(r, _("Prompt: %s\n"), _(prop->text));
+ str_printf(r, _(" Defined at %s:%d\n"), prop->menu->file->name,
+ prop->menu->lineno);
+ if (!expr_is_yes(prop->visible.expr)) {
+ str_append(r, _(" Depends on: "));
+ expr_gstr_print(prop->visible.expr, r);
+ str_append(r, "\n");
+ }
+ menu = prop->menu->parent;
+ for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent)
+ submenu[i++] = menu;
+ if (i > 0) {
+ str_printf(r, _(" Location:\n"));
+ for (j = 4; --i >= 0; j += 2) {
+ menu = submenu[i];
+ str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu)));
+ if (menu->sym) {
+ str_printf(r, " (%s [=%s])", menu->sym->name ?
+ menu->sym->name : _("<choice>"),
+ sym_get_string_value(menu->sym));
+ }
+ str_append(r, "\n");
+ }
+ }
+}
+
+void get_symbol_str(struct gstr *r, struct symbol *sym)
+{
+ bool hit;
+ struct property *prop;
+
+ if (sym && sym->name)
+ str_printf(r, "Symbol: %s [=%s]\n", sym->name,
+ sym_get_string_value(sym));
+ for_all_prompts(sym, prop)
+ get_prompt_str(r, prop);
+ hit = false;
+ for_all_properties(sym, prop, P_SELECT) {
+ if (!hit) {
+ str_append(r, " Selects: ");
+ hit = true;
+ } else
+ str_printf(r, " && ");
+ expr_gstr_print(prop->expr, r);
+ }
+ if (hit)
+ str_append(r, "\n");
+ if (sym->rev_dep.expr) {
+ str_append(r, _(" Selected by: "));
+ expr_gstr_print(sym->rev_dep.expr, r);
+ str_append(r, "\n");
+ }
+ str_append(r, "\n\n");
+}
+
+void menu_get_ext_help(struct menu *menu, struct gstr *help)
+{
+ struct symbol *sym = menu->sym;
+
+ if (menu_has_help(menu)) {
+ if (sym->name) {
+ str_printf(help, "CONFIG_%s:\n\n", sym->name);
+ str_append(help, _(menu_get_help(menu)));
+ str_append(help, "\n");
+ }
+ } else {
+ str_append(help, nohelp_text);
+ }
+ if (sym)
+ get_symbol_str(help, sym);
+}
diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
index ce7d508c752..00c51507cfc 100644
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -1042,12 +1042,10 @@ void ConfigInfoView::menuInfo(void)
if (showDebug())
debug = debug_info(sym);
- help = menu_get_help(menu);
- /* Gettextize if the help text not empty */
- if (help.isEmpty())
- help = print_filter(menu_get_help(menu));
- else
- help = print_filter(_(menu_get_help(menu)));
+ struct gstr help_gstr = str_new();
+ menu_get_ext_help(menu, &help_gstr);
+ help = print_filter(str_get(&help_gstr));
+ str_free(&help_gstr);
} else if (menu->prompt) {
head += "<big><b>";
head += print_filter(_(menu->prompt->text));
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index 18f3e5c3363..6c8fbbb66eb 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -36,7 +36,7 @@ tristate modules_val;
struct expr *sym_env_list;
-void sym_add_default(struct symbol *sym, const char *def)
+static void sym_add_default(struct symbol *sym, const char *def)
{
struct property *prop = prop_alloc(P_DEFAULT, sym);
@@ -125,7 +125,7 @@ struct property *sym_get_default_prop(struct symbol *sym)
return NULL;
}
-struct property *sym_get_range_prop(struct symbol *sym)
+static struct property *sym_get_range_prop(struct symbol *sym)
{
struct property *prop;
@@ -943,7 +943,7 @@ const char *prop_get_type_name(enum prop_type type)
return "unknown";
}
-void prop_add_env(const char *env)
+static void prop_add_env(const char *env)
{
struct symbol *sym, *sym2;
struct property *prop;
diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl
index 89774011965..5f0fcb712e2 100644
--- a/scripts/markup_oops.pl
+++ b/scripts/markup_oops.pl
@@ -184,10 +184,7 @@ if ($target eq "0") {
# if it's a module, we need to find the .ko file and calculate a load offset
if ($module ne "") {
- my $dir = dirname($filename);
- $dir = $dir . "/";
- my $mod = $module . ".ko";
- my $modulefile = `find $dir -name $mod | head -1`;
+ my $modulefile = `modinfo $module | grep '^filename:' | awk '{ print \$2 }'`;
chomp($modulefile);
$filename = $modulefile;
if ($filename eq "") {
diff --git a/scripts/tags.sh b/scripts/tags.sh
index 4a34ec591e8..d52f7a01557 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -101,7 +101,8 @@ exuberant()
-I ____cacheline_aligned_in_smp \
-I ____cacheline_internodealigned_in_smp \
-I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \
- --extra=+f --c-kinds=+px \
+ -I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
+ --extra=+f --c-kinds=-px \
--regex-asm='/^ENTRY\(([^)]*)\).*/\1/' \
--regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/'
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index b8186bac8b7..6cf8fd2b79e 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -61,7 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
struct cgroup_subsys devices_subsys;
static int devcgroup_can_attach(struct cgroup_subsys *ss,
- struct cgroup *new_cgroup, struct task_struct *task)
+ struct cgroup *new_cgroup, struct task_struct *task,
+ bool threadgroup)
{
if (current != task && !capable(CAP_SYS_ADMIN))
return -EPERM;
diff --git a/security/keys/gc.c b/security/keys/gc.c
index 485fc6233c3..4770be375ff 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -169,9 +169,9 @@ static void key_garbage_collector(struct work_struct *work)
/* trawl through the keys looking for keyrings */
for (;;) {
- if (key->expiry > now && key->expiry < new_timer) {
+ if (key->expiry > limit && key->expiry < new_timer) {
kdebug("will expire %x in %ld",
- key_serial(key), key->expiry - now);
+ key_serial(key), key->expiry - limit);
new_timer = key->expiry;
}
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 500aad0ebd6..3bb90b6f1dd 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -187,7 +187,7 @@ static inline void print_ipv6_addr(struct audit_buffer *ab,
char *name1, char *name2)
{
if (!ipv6_addr_any(addr))
- audit_log_format(ab, " %s=%pI6", name1, addr);
+ audit_log_format(ab, " %s=%pI6c", name1, addr);
if (port)
audit_log_format(ab, " %s=%d", name2, ntohs(port));
}
diff --git a/security/min_addr.c b/security/min_addr.c
index 14cc7b3b8d0..c844eed7915 100644
--- a/security/min_addr.c
+++ b/security/min_addr.c
@@ -28,12 +28,12 @@ static void update_mmap_min_addr(void)
* sysctl handler which just sets dac_mmap_min_addr = the new value and then
* calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
*/
-int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+int mmap_min_addr_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
- ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
update_mmap_min_addr();
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 1ed0f076aad..b4b5da1c0a4 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -868,8 +868,19 @@ u32 avc_policy_seqno(void)
void avc_disable(void)
{
- avc_flush();
- synchronize_rcu();
- if (avc_node_cachep)
- kmem_cache_destroy(avc_node_cachep);
+ /*
+ * If you are looking at this because you have realized that we are
+ * not destroying the avc_node_cachep it might be easy to fix, but
+ * I don't know the memory barrier semantics well enough to know. It's
+ * possible that some other task dereferenced security_ops when
+ * it still pointed to selinux operations. If that is the case it's
+ * possible that it is about to use the avc and is about to need the
+ * avc_node_cachep. I know I could wrap the security.c security_ops call
+ * in an rcu_lock, but seriously, it's not worth it. Instead I just flush
+ * the cache and get that memory back.
+ */
+ if (avc_node_cachep) {
+ avc_flush();
+ /* kmem_cache_destroy(avc_node_cachep); */
+ }
}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 417f7c99452..bb230d5d708 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2411,7 +2411,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
/* Wake up the parent if it is waiting so that it can recheck
* wait permission to the new task SID. */
read_lock(&tasklist_lock);
- wake_up_interruptible(&current->real_parent->signal->wait_chldexit);
+ __wake_up_parent(current, current->real_parent);
read_unlock(&tasklist_lock);
}
diff --git a/usr/.gitignore b/usr/.gitignore
index 69b2e89fa16..8e48117a3f3 100644
--- a/usr/.gitignore
+++ b/usr/.gitignore
@@ -4,5 +4,7 @@
gen_init_cpio
initramfs_data.cpio
initramfs_data.cpio.gz
+initramfs_data.cpio.bz2
+initramfs_data.cpio.lzma
initramfs_list
include
diff --git a/usr/Makefile b/usr/Makefile
index 245145a99c1..1e6a9e4a72c 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -6,7 +6,7 @@ klibcdirs:;
PHONY += klibcdirs
-# Gzip, but no bzip2
+# Gzip
suffix_$(CONFIG_INITRAMFS_COMPRESSION_GZIP) = .gz
# Bzip2
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 897bff3b7df..034a798b043 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -738,8 +738,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
bool called = true;
struct kvm_vcpu *vcpu;
- if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
- cpumask_clear(cpus);
+ zalloc_cpumask_var(&cpus, GFP_ATOMIC);
spin_lock(&kvm->requests_lock);
me = smp_processor_id();