aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/cgroups/cgroups.txt (renamed from Documentation/cgroups.txt)0
-rw-r--r--Documentation/cgroups/freezer-subsystem.txt99
-rw-r--r--Documentation/controllers/memory.txt24
-rw-r--r--Documentation/cpusets.txt2
-rw-r--r--Documentation/filesystems/ext3.txt5
-rw-r--r--Documentation/filesystems/proc.txt28
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--Documentation/mtd/nand_ecc.txt714
-rw-r--r--Documentation/sysrq.txt3
-rw-r--r--Documentation/vm/unevictable-lru.txt615
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/thread_info.h2
-rw-r--r--arch/alpha/kernel/core_marvel.c4
-rw-r--r--arch/alpha/kernel/time.c18
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/mach-pxa/include/mach/pxa3xx_nand.h44
-rw-r--r--arch/arm/plat-mxc/include/mach/mxc_nand.h27
-rw-r--r--arch/arm/plat-omap/include/mach/onenand.h6
-rw-r--r--arch/avr32/Kconfig2
-rw-r--r--arch/avr32/include/asm/thread_info.h1
-rw-r--r--arch/blackfin/Kconfig3
-rw-r--r--arch/cris/Kconfig2
-rw-r--r--arch/cris/arch-v10/drivers/ds1302.c24
-rw-r--r--arch/cris/arch-v10/drivers/pcf8563.c24
-rw-r--r--arch/cris/arch-v32/drivers/pcf8563.c24
-rw-r--r--arch/cris/kernel/time.c18
-rw-r--r--arch/frv/Kconfig2
-rw-r--r--arch/h8300/Kconfig2
-rw-r--r--arch/h8300/include/asm/thread_info.h2
-rw-r--r--arch/ia64/Kconfig2
-rw-r--r--arch/ia64/hp/common/sba_iommu.c5
-rw-r--r--arch/ia64/kernel/crash_dump.c4
-rw-r--r--arch/ia64/kernel/efi.c2
-rw-r--r--arch/ia64/kernel/setup.c13
-rw-r--r--arch/ia64/mm/init.c17
-rw-r--r--arch/m32r/Kconfig2
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/m68k/bvme6000/rtc.c1
-rw-r--r--arch/m68knommu/Kconfig2
-rw-r--r--arch/m68knommu/include/asm/thread_info.h2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/dec/time.c18
-rw-r--r--arch/mips/include/asm/mc146818-time.h18
-rw-r--r--arch/mips/pmc-sierra/yosemite/setup.c30
-rw-r--r--arch/mips/sibyte/swarm/rtc_m41t81.c26
-rw-r--r--arch/mips/sibyte/swarm/rtc_xicor1241.c26
-rw-r--r--arch/mn10300/Kconfig2
-rw-r--r--arch/mn10300/kernel/rtc.c6
-rw-r--r--arch/parisc/Kconfig2
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/ps3av.h3
-rw-r--r--arch/powerpc/kernel/crash_dump.c10
-rw-r--r--arch/powerpc/mm/mem.c17
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/mm/init.c11
-rw-r--r--arch/sh/Kconfig2
-rw-r--r--arch/sh/kernel/crash_dump.c3
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/include/asm/thread_info_32.h2
-rw-r--r--arch/sparc/include/asm/thread_info_64.h2
-rw-r--r--arch/sparc64/Kconfig1
-rw-r--r--arch/um/Kconfig2
-rw-r--r--arch/um/sys-i386/signal.c3
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/kernel/crash_dump_32.c3
-rw-r--r--arch/x86/kernel/crash_dump_64.c3
-rw-r--r--arch/x86/kernel/rtc.c20
-rw-r--r--arch/x86/kernel/setup.c8
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--arch/x86/xen/mmu.c1
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--drivers/acpi/battery.c2
-rw-r--r--drivers/acpi/sbs.c2
-rw-r--r--drivers/acpi/sleep/proc.c18
-rw-r--r--drivers/acpi/system.c1
-rw-r--r--drivers/base/memory.c4
-rw-r--r--drivers/base/node.c69
-rw-r--r--drivers/block/aoe/aoeblk.c2
-rw-r--r--drivers/block/nbd.c2
-rw-r--r--drivers/char/ds1286.c34
-rw-r--r--drivers/char/ds1302.c24
-rw-r--r--drivers/char/ip27-rtc.c24
-rw-r--r--drivers/char/pc8736x_gpio.c11
-rw-r--r--drivers/char/rtc.c40
-rw-r--r--drivers/char/sx.c6
-rw-r--r--drivers/char/sysrq.c2
-rw-r--r--drivers/char/tpm/tpm.c2
-rw-r--r--drivers/edac/cell_edac.c2
-rw-r--r--drivers/firmware/iscsi_ibft.c1
-rw-r--r--drivers/gpio/gpiolib.c4
-rw-r--r--drivers/hwmon/applesmc.c109
-rw-r--r--drivers/hwmon/pc87360.c248
-rw-r--r--drivers/i2c/chips/at24.c1
-rw-r--r--drivers/i2c/chips/ds1682.c1
-rw-r--r--drivers/i2c/chips/menelaus.c34
-rw-r--r--drivers/infiniband/core/cm.c2
-rw-r--r--drivers/media/dvb/ttpci/av7110.c2
-rw-r--r--drivers/media/video/cx18/cx18-driver.h1
-rw-r--r--drivers/media/video/ivtv/ivtv-driver.h1
-rw-r--r--drivers/memstick/core/mspro_block.c1
-rw-r--r--drivers/misc/hp-wmi.c1
-rw-r--r--drivers/mtd/Kconfig5
-rw-r--r--drivers/mtd/chips/Kconfig4
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c71
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0002.c52
-rw-r--r--drivers/mtd/chips/cfi_probe.c58
-rw-r--r--drivers/mtd/chips/cfi_util.c66
-rw-r--r--drivers/mtd/chips/gen_probe.c2
-rw-r--r--drivers/mtd/cmdlinepart.c1
-rw-r--r--drivers/mtd/devices/Kconfig21
-rw-r--r--drivers/mtd/devices/m25p80.c138
-rw-r--r--drivers/mtd/devices/mtd_dataflash.c214
-rw-r--r--drivers/mtd/inftlcore.c5
-rw-r--r--drivers/mtd/maps/Kconfig33
-rw-r--r--drivers/mtd/maps/Makefile4
-rw-r--r--drivers/mtd/maps/ebony.c163
-rw-r--r--drivers/mtd/maps/ocotea.c154
-rw-r--r--drivers/mtd/maps/omap-toto-flash.c133
-rw-r--r--drivers/mtd/maps/pci.c18
-rw-r--r--drivers/mtd/maps/physmap_of.c3
-rw-r--r--drivers/mtd/maps/walnut.c122
-rw-r--r--drivers/mtd/mtdchar.c4
-rw-r--r--drivers/mtd/mtdconcat.c4
-rw-r--r--drivers/mtd/mtdoops.c42
-rw-r--r--drivers/mtd/mtdpart.c4
-rw-r--r--drivers/mtd/nand/Kconfig42
-rw-r--r--drivers/mtd/nand/Makefile4
-rw-r--r--drivers/mtd/nand/atmel_nand.c58
-rw-r--r--drivers/mtd/nand/cs553x_nand.c2
-rw-r--r--drivers/mtd/nand/fsl_elbc_nand.c3
-rw-r--r--drivers/mtd/nand/fsl_upm.c68
-rw-r--r--drivers/mtd/nand/gpio.c375
-rw-r--r--drivers/mtd/nand/mxc_nand.c1077
-rw-r--r--drivers/mtd/nand/nand_base.c16
-rw-r--r--drivers/mtd/nand/nand_ecc.c554
-rw-r--r--drivers/mtd/nand/nandsim.c1
-rw-r--r--drivers/mtd/nand/pxa3xx_nand.c147
-rw-r--r--drivers/mtd/nand/sh_flctl.c878
-rw-r--r--drivers/mtd/nand/toto.c206
-rw-r--r--drivers/mtd/ofpart.c1
-rw-r--r--drivers/mtd/onenand/Kconfig8
-rw-r--r--drivers/mtd/onenand/Makefile1
-rw-r--r--drivers/mtd/onenand/omap2.c802
-rw-r--r--drivers/mtd/onenand/onenand_base.c2
-rw-r--r--drivers/mtd/ssfdc.c3
-rw-r--r--drivers/mtd/ubi/cdev.c6
-rw-r--r--drivers/mtd/ubi/scan.c2
-rw-r--r--drivers/mtd/ubi/vtbl.c4
-rw-r--r--drivers/pci/intel-iommu.c4
-rw-r--r--drivers/pci/pci.c5
-rw-r--r--drivers/pci/probe.c28
-rw-r--r--drivers/pci/rom.c6
-rw-r--r--drivers/pci/setup-bus.c13
-rw-r--r--drivers/pci/setup-res.c40
-rw-r--r--drivers/power/power_supply_sysfs.c2
-rw-r--r--drivers/ps3/ps3av.c16
-rw-r--r--drivers/ps3/ps3av_cmd.c19
-rw-r--r--drivers/rtc/rtc-at91rm9200.c42
-rw-r--r--drivers/rtc/rtc-bq4802.c30
-rw-r--r--drivers/rtc/rtc-cmos.c91
-rw-r--r--drivers/rtc/rtc-ds1216.c26
-rw-r--r--drivers/rtc/rtc-ds1302.c28
-rw-r--r--drivers/rtc/rtc-ds1305.c39
-rw-r--r--drivers/rtc/rtc-ds1307.c41
-rw-r--r--drivers/rtc/rtc-ds1511.c43
-rw-r--r--drivers/rtc/rtc-ds1553.c38
-rw-r--r--drivers/rtc/rtc-ds1742.c30
-rw-r--r--drivers/rtc/rtc-fm3130.c56
-rw-r--r--drivers/rtc/rtc-isl1208.c42
-rw-r--r--drivers/rtc/rtc-m41t80.c44
-rw-r--r--drivers/rtc/rtc-m41t94.c28
-rw-r--r--drivers/rtc/rtc-m48t59.c49
-rw-r--r--drivers/rtc/rtc-m48t86.c28
-rw-r--r--drivers/rtc/rtc-max6900.c32
-rw-r--r--drivers/rtc/rtc-max6902.c32
-rw-r--r--drivers/rtc/rtc-omap.c24
-rw-r--r--drivers/rtc/rtc-pcf8563.c24
-rw-r--r--drivers/rtc/rtc-pcf8583.c20
-rw-r--r--drivers/rtc/rtc-r9701.c24
-rw-r--r--drivers/rtc/rtc-rs5c313.c28
-rw-r--r--drivers/rtc/rtc-rs5c348.c30
-rw-r--r--drivers/rtc/rtc-rs5c372.c42
-rw-r--r--drivers/rtc/rtc-s35390a.c34
-rw-r--r--drivers/rtc/rtc-s3c.c42
-rw-r--r--drivers/rtc/rtc-sh.c40
-rw-r--r--drivers/rtc/rtc-stk17ta8.c39
-rw-r--r--drivers/rtc/rtc-v3020.c28
-rw-r--r--drivers/rtc/rtc-x1205.c30
-rw-r--r--drivers/scsi/arcmsr/arcmsr_attr.c3
-rw-r--r--drivers/scsi/sr_vendor.c12
-rw-r--r--drivers/serial/8250_gsc.c2
-rw-r--r--drivers/serial/serial_txx9.c2
-rw-r--r--drivers/serial/sn_console.c2
-rw-r--r--drivers/staging/go7007/Kconfig2
-rw-r--r--drivers/staging/sxg/Kconfig1
-rw-r--r--drivers/telephony/phonedev.c2
-rw-r--r--drivers/video/fbmem.c174
-rw-r--r--drivers/w1/slaves/w1_ds2760.c1
-rw-r--r--fs/Kconfig333
-rw-r--r--fs/Kconfig.binfmt22
-rw-r--r--fs/binfmt_elf.c12
-rw-r--r--fs/buffer.c3
-rw-r--r--fs/cifs/Kconfig142
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/exec.c7
-rw-r--r--fs/ext3/balloc.c3
-rw-r--r--fs/ext3/dir.c30
-rw-r--r--fs/ext3/inode.c7
-rw-r--r--fs/ext3/resize.c3
-rw-r--r--fs/ext3/super.c16
-rw-r--r--fs/hfsplus/extents.c3
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/jbd/commit.c10
-rw-r--r--fs/jbd/transaction.c16
-rw-r--r--fs/jffs2/Kconfig188
-rw-r--r--fs/jffs2/compr.c4
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/erase.c4
-rw-r--r--fs/jffs2/fs.c6
-rw-r--r--fs/jffs2/nodemgmt.c4
-rw-r--r--fs/jffs2/wbuf.c5
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/proc/proc_misc.c85
-rw-r--r--fs/proc/vmcore.c5
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/seq_file.c29
-rw-r--r--include/asm-cris/thread_info.h2
-rw-r--r--include/asm-generic/rtc.h24
-rw-r--r--include/asm-m68k/thread_info.h1
-rw-r--r--include/asm-parisc/thread_info.h2
-rw-r--r--include/asm-um/thread_info.h2
-rw-r--r--include/asm-xtensa/thread_info.h2
-rw-r--r--include/linux/Kbuild2
-rw-r--r--include/linux/backing-dev.h13
-rw-r--r--include/linux/bcd.h16
-rw-r--r--include/linux/bitmap.h1
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/byteorder/Kbuild1
-rw-r--r--include/linux/byteorder/big_endian.h1
-rw-r--r--include/linux/byteorder/little_endian.h1
-rw-r--r--include/linux/cgroup.h28
-rw-r--r--include/linux/cgroup_subsys.h6
-rw-r--r--include/linux/crash_dump.h38
-rw-r--r--include/linux/ext3_fs.h2
-rw-r--r--include/linux/fb.h1
-rw-r--r--include/linux/freezer.h43
-rw-r--r--include/linux/jbd.h3
-rw-r--r--include/linux/memcontrol.h34
-rw-r--r--include/linux/migrate.h3
-rw-r--r--include/linux/mm.h9
-rw-r--r--include/linux/mm_inline.h98
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/mmzone.h105
-rw-r--r--include/linux/mtd/cfi.h9
-rw-r--r--include/linux/mtd/flashchip.h4
-rw-r--r--include/linux/mtd/mtd.h4
-rw-r--r--include/linux/mtd/nand-gpio.h19
-rw-r--r--include/linux/mtd/nand.h1
-rw-r--r--include/linux/mtd/onenand_regs.h2
-rw-r--r--include/linux/mtd/partitions.h1
-rw-r--r--include/linux/mtd/sh_flctl.h125
-rw-r--r--include/linux/page-flags.h55
-rw-r--r--include/linux/page_cgroup.h103
-rw-r--r--include/linux/pagemap.h44
-rw-r--r--include/linux/pagevec.h34
-rw-r--r--include/linux/pci.h2
-rw-r--r--include/linux/ptrace.h1
-rw-r--r--include/linux/rmap.h29
-rw-r--r--include/linux/sched.h13
-rw-r--r--include/linux/seq_file.h13
-rw-r--r--include/linux/swab.h10
-rw-r--r--include/linux/swap.h69
-rw-r--r--include/linux/sysfs.h5
-rw-r--r--include/linux/vmalloc.h15
-rw-r--r--include/linux/vmstat.h20
-rw-r--r--include/net/netns/x_tables.h4
-rw-r--r--init/Kconfig7
-rw-r--r--init/main.c2
-rw-r--r--ipc/mqueue.c20
-rw-r--r--ipc/shm.c4
-rw-r--r--kernel/Kconfig.freezer2
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/cgroup.c265
-rw-r--r--kernel/cgroup_debug.c4
-rw-r--r--kernel/cgroup_freezer.c379
-rw-r--r--kernel/configs.c9
-rw-r--r--kernel/cpuset.c17
-rw-r--r--kernel/freezer.c154
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kthread.c5
-rw-r--r--kernel/power/process.c119
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcupreempt.c2
-rw-r--r--kernel/sysctl.c10
-rw-r--r--lib/bitmap.c11
-rw-r--r--lib/vsprintf.c49
-rw-r--r--mm/Kconfig11
-rw-r--r--mm/Makefile3
-rw-r--r--mm/filemap.c37
-rw-r--r--mm/fremap.c27
-rw-r--r--mm/hugetlb.c44
-rw-r--r--mm/internal.h131
-rw-r--r--mm/memcontrol.c466
-rw-r--r--mm/memory.c127
-rw-r--r--mm/memory_hotplug.c19
-rw-r--r--mm/mempolicy.c11
-rw-r--r--mm/migrate.c274
-rw-r--r--mm/mlock.c443
-rw-r--r--mm/mmap.c81
-rw-r--r--mm/mremap.c8
-rw-r--r--mm/nommu.c44
-rw-r--r--mm/page-writeback.c8
-rw-r--r--mm/page_alloc.c121
-rw-r--r--mm/page_cgroup.c237
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/rmap.c319
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/swap.c172
-rw-r--r--mm/swap_state.c11
-rw-r--r--mm/swapfile.c27
-rw-r--r--mm/truncate.c4
-rw-r--r--mm/vmalloc.c975
-rw-r--r--mm/vmscan.c1026
-rw-r--r--mm/vmstat.c33
-rw-r--r--net/bridge/br_netfilter.c2
-rw-r--r--net/core/dev.c6
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/dccp/minisocks.c1
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/ipv4/arp.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1
-rw-r--r--net/ipv6/syncookies.c1
-rw-r--r--net/ipv6/tcp_ipv6.c6
-rw-r--r--net/netfilter/Kconfig1
-rw-r--r--net/netfilter/ipvs/Kconfig4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/xt_NFQUEUE.c2
-rw-r--r--net/netfilter/xt_iprange.c8
-rw-r--r--net/netfilter/xt_recent.c10
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--security/device_cgroup.c46
-rw-r--r--sound/core/pcm_misc.c1
-rw-r--r--sound/drivers/dummy.c2
-rw-r--r--sound/pci/ca0106/ca0106_main.c1
-rw-r--r--sound/ppc/snd_ps3.c96
-rw-r--r--sound/ppc/snd_ps3.h1
-rw-r--r--sound/soc/omap/omap-mcbsp.c24
352 files changed, 13209 insertions, 4409 deletions
diff --git a/.mailmap b/.mailmap
index dfab12f809e..eba9bf953ef 100644
--- a/.mailmap
+++ b/.mailmap
@@ -66,6 +66,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
Koushik <raghavendra.koushik@neterion.com>
Leonid I Ananiev <leonid.i.ananiev@intel.com>
Linas Vepstas <linas@austin.ibm.com>
+Mark Brown <broonie@sirena.org.uk>
Matthieu CASTET <castet.matthieu@free.fr>
Michael Buesch <mb@bu3sch.de>
Michael Buesch <mbuesch@freenet.de>
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups/cgroups.txt
index d9014aa0eb6..d9014aa0eb6 100644
--- a/Documentation/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt
new file mode 100644
index 00000000000..c50ab58b72e
--- /dev/null
+++ b/Documentation/cgroups/freezer-subsystem.txt
@@ -0,0 +1,99 @@
+ The cgroup freezer is useful to batch job management system which start
+and stop sets of tasks in order to schedule the resources of a machine
+according to the desires of a system administrator. This sort of program
+is often used on HPC clusters to schedule access to the cluster as a
+whole. The cgroup freezer uses cgroups to describe the set of tasks to
+be started/stopped by the batch job management system. It also provides
+a means to start and stop the tasks composing the job.
+
+ The cgroup freezer will also be useful for checkpointing running groups
+of tasks. The freezer allows the checkpoint code to obtain a consistent
+image of the tasks by attempting to force the tasks in a cgroup into a
+quiescent state. Once the tasks are quiescent another task can
+walk /proc or invoke a kernel interface to gather information about the
+quiesced tasks. Checkpointed tasks can be restarted later should a
+recoverable error occur. This also allows the checkpointed tasks to be
+migrated between nodes in a cluster by copying the gathered information
+to another node and restarting the tasks there.
+
+ Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
+and resuming tasks in userspace. Both of these signals are observable
+from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
+blocked, or ignored it can be seen by waiting or ptracing parent tasks.
+SIGCONT is especially unsuitable since it can be caught by the task. Any
+programs designed to watch for SIGSTOP and SIGCONT could be broken by
+attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
+demonstrate this problem using nested bash shells:
+
+ $ echo $$
+ 16644
+ $ bash
+ $ echo $$
+ 16690
+
+ From a second, unrelated bash shell:
+ $ kill -SIGSTOP 16690
+ $ kill -SIGCONT 16990
+
+ <at this point 16990 exits and causes 16644 to exit too>
+
+ This happens because bash can observe both signals and choose how it
+responds to them.
+
+ Another example of a program which catches and responds to these
+signals is gdb. In fact any program designed to use ptrace is likely to
+have a problem with this method of stopping and resuming tasks.
+
+ In contrast, the cgroup freezer uses the kernel freezer code to
+prevent the freeze/unfreeze cycle from becoming visible to the tasks
+being frozen. This allows the bash example above and gdb to run as
+expected.
+
+ The freezer subsystem in the container filesystem defines a file named
+freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the
+cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup.
+Reading will return the current state.
+
+* Examples of usage :
+
+ # mkdir /containers/freezer
+ # mount -t cgroup -ofreezer freezer /containers
+ # mkdir /containers/0
+ # echo $some_pid > /containers/0/tasks
+
+to get status of the freezer subsystem :
+
+ # cat /containers/0/freezer.state
+ THAWED
+
+to freeze all tasks in the container :
+
+ # echo FROZEN > /containers/0/freezer.state
+ # cat /containers/0/freezer.state
+ FREEZING
+ # cat /containers/0/freezer.state
+ FROZEN
+
+to unfreeze all tasks in the container :
+
+ # echo THAWED > /containers/0/freezer.state
+ # cat /containers/0/freezer.state
+ THAWED
+
+This is the basic mechanism which should do the right thing for user space task
+in a simple scenario.
+
+It's important to note that freezing can be incomplete. In that case we return
+EBUSY. This means that some tasks in the cgroup are busy doing something that
+prevents us from completely freezing the cgroup at this time. After EBUSY,
+the cgroup will remain partially frozen -- reflected by freezer.state reporting
+"FREEZING" when read. The state will remain "FREEZING" until one of these
+things happens:
+
+ 1) Userspace cancels the freezing operation by writing "THAWED" to
+ the freezer.state file
+ 2) Userspace retries the freezing operation by writing "FROZEN" to
+ the freezer.state file (writing "FREEZING" is not legal
+ and returns EIO)
+ 3) The tasks that blocked the cgroup from entering the "FROZEN"
+ state disappear from the cgroup's set of tasks.
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 9b53d582736..1c07547d3f8 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -112,14 +112,22 @@ the per cgroup LRU.
2.2.1 Accounting details
-All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted.
-RSS pages are accounted at the time of page_add_*_rmap() unless they've already
-been accounted for earlier. A file page will be accounted for as Page Cache;
-it's mapped into the page tables of a process, duplicate accounting is carefully
-avoided. Page Cache pages are accounted at the time of add_to_page_cache().
-The corresponding routines that remove a page from the page tables or removes
-a page from Page Cache is used to decrement the accounting counters of the
-cgroup.
+All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
+(some pages which never be reclaimable and will not be on global LRU
+ are not accounted. we just accounts pages under usual vm management.)
+
+RSS pages are accounted at page_fault unless they've already been accounted
+for earlier. A file page will be accounted for as Page Cache when it's
+inserted into inode (radix-tree). While it's mapped into the page tables of
+processes, duplicate accounting is carefully avoided.
+
+A RSS page is unaccounted when it's fully unmapped. A PageCache page is
+unaccounted when it's removed from radix-tree.
+
+At page migration, accounting information is kept.
+
+Note: we just account pages-on-lru because our purpose is to control amount
+of used pages. not-on-lru pages are tend to be out-of-control from vm view.
2.3 Shared Page Accounting
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 47e568a9370..5c86c258c79 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -48,7 +48,7 @@ hooks, beyond what is already present, required to manage dynamic
job placement on large systems.
Cpusets use the generic cgroup subsystem described in
-Documentation/cgroup.txt.
+Documentation/cgroups/cgroups.txt.
Requests by a task, using the sched_setaffinity(2) system call to
include CPUs in its CPU affinity mask, and using the mbind(2) and
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 295f26cd895..9dd2a3bb2ac 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -96,6 +96,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error.
errors=continue Keep going on a filesystem error.
errors=panic Panic and halt the machine if an error occurs.
+data_err=ignore(*) Just print an error message if an error occurs
+ in a file data buffer in ordered mode.
+data_err=abort Abort the journal if an error occurs in a file
+ data buffer in ordered mode.
+
grpid Give objects the same group ID as their creator.
bsdgroups
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index c032bf39e8b..bcceb99b81d 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1384,15 +1384,18 @@ causes the kernel to prefer to reclaim dentries and inodes.
dirty_background_ratio
----------------------
-Contains, as a percentage of total system memory, the number of pages at which
-the pdflush background writeback daemon will start writing out dirty data.
+Contains, as a percentage of the dirtyable system memory (free pages + mapped
+pages + file cache, not including locked pages and HugePages), the number of
+pages at which the pdflush background writeback daemon will start writing out
+dirty data.
dirty_ratio
-----------------
-Contains, as a percentage of total system memory, the number of pages at which
-a process which is generating disk writes will itself start writing out dirty
-data.
+Contains, as a percentage of the dirtyable system memory (free pages + mapped
+pages + file cache, not including locked pages and HugePages), the number of
+pages at which a process which is generating disk writes will itself start
+writing out dirty data.
dirty_writeback_centisecs
-------------------------
@@ -2412,24 +2415,29 @@ will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
of memory types. If a bit of the bitmask is set, memory segments of the
corresponding memory type are dumped, otherwise they are not dumped.
-The following 4 memory types are supported:
+The following 7 memory types are supported:
- (bit 0) anonymous private memory
- (bit 1) anonymous shared memory
- (bit 2) file-backed private memory
- (bit 3) file-backed shared memory
- (bit 4) ELF header pages in file-backed private memory areas (it is
effective only if the bit 2 is cleared)
+ - (bit 5) hugetlb private memory
+ - (bit 6) hugetlb shared memory
Note that MMIO pages such as frame buffer are never dumped and vDSO pages
are always dumped regardless of the bitmask status.
-Default value of coredump_filter is 0x3; this means all anonymous memory
-segments are dumped.
+ Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only
+ effected by bit 5-6.
+
+Default value of coredump_filter is 0x23; this means all anonymous memory
+segments and hugetlb private memory are dumped.
If you don't want to dump all shared memory segments attached to pid 1234,
-write 1 to the process's proc file.
+write 0x21 to the process's proc file.
- $ echo 0x1 > /proc/1234/coredump_filter
+ $ echo 0x21 > /proc/1234/coredump_filter
When a new process is created, the process inherits the bitmask status from its
parent. It is useful to set up coredump_filter before the program runs.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bcecfaa1e77..0f1544f6740 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -690,7 +690,7 @@ and is between 256 and 4096 characters. It is defined in the file
See Documentation/block/as-iosched.txt and
Documentation/block/deadline-iosched.txt for details.
- elfcorehdr= [X86-32, X86_64]
+ elfcorehdr= [IA64,PPC,SH,X86-32,X86_64]
Specifies physical address of start of kernel core
image elf header. Generally kexec loader will
pass this option to capture kernel.
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
new file mode 100644
index 00000000000..bdf93b7f0f2
--- /dev/null
+++ b/Documentation/mtd/nand_ecc.txt
@@ -0,0 +1,714 @@
+Introduction
+============
+
+Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
+I felt there was room for optimisation. I bashed the code for a few hours
+performing tricks like table lookup removing superfluous code etc.
+After that the speed was increased by 35-40%.
+Still I was not too happy as I felt there was additional room for improvement.
+
+Bad! I was hooked.
+I decided to annotate my steps in this file. Perhaps it is useful to someone
+or someone learns something from it.
+
+
+The problem
+===========
+
+NAND flash (at least SLC one) typically has sectors of 256 bytes.
+However NAND flash is not extremely reliable so some error detection
+(and sometimes correction) is needed.
+
+This is done by means of a Hamming code. I'll try to explain it in
+laymans terms (and apologies to all the pro's in the field in case I do
+not use the right terminology, my coding theory class was almost 30
+years ago, and I must admit it was not one of my favourites).
+
+As I said before the ecc calculation is performed on sectors of 256
+bytes. This is done by calculating several parity bits over the rows and
+columns. The parity used is even parity which means that the parity bit = 1
+if the data over which the parity is calculated is 1 and the parity bit = 0
+if the data over which the parity is calculated is 0. So the total
+number of bits over the data over which the parity is calculated + the
+parity bit is even. (see wikipedia if you can't follow this).
+Parity is often calculated by means of an exclusive or operation,
+sometimes also referred to as xor. In C the operator for xor is ^
+
+Back to ecc.
+Let's give a small figure:
+
+byte 0: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp4 ... rp14
+byte 1: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp2 rp4 ... rp14
+byte 2: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp4 ... rp14
+byte 3: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp4 ... rp14
+byte 4: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp5 ... rp14
+....
+byte 254: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp5 ... rp15
+byte 255: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp5 ... rp15
+ cp1 cp0 cp1 cp0 cp1 cp0 cp1 cp0
+ cp3 cp3 cp2 cp2 cp3 cp3 cp2 cp2
+ cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4
+
+This figure represents a sector of 256 bytes.
+cp is my abbreviaton for column parity, rp for row parity.
+
+Let's start to explain column parity.
+cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
+so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
+Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
+cp2 is the parity over bit0, bit1, bit4 and bit5
+cp3 is the parity over bit2, bit3, bit6 and bit7.
+cp4 is the parity over bit0, bit1, bit2 and bit3.
+cp5 is the parity over bit4, bit5, bit6 and bit7.
+Note that each of cp0 .. cp5 is exactly one bit.
+
+Row parity actually works almost the same.
+rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
+rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
+rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
+(so handle two bytes, then skip 2 bytes).
+rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
+for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
+so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
+and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
+The story now becomes quite boring. I guess you get the idea.
+rp6 covers 8 bytes then skips 8 etc
+rp7 skips 8 bytes then covers 8 etc
+rp8 covers 16 bytes then skips 16 etc
+rp9 skips 16 bytes then covers 16 etc
+rp10 covers 32 bytes then skips 32 etc
+rp11 skips 32 bytes then covers 32 etc
+rp12 covers 64 bytes then skips 64 etc
+rp13 skips 64 bytes then covers 64 etc
+rp14 covers 128 bytes then skips 128
+rp15 skips 128 bytes then covers 128
+
+In the end the parity bits are grouped together in three bytes as
+follows:
+ECC Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
+ECC 0 rp07 rp06 rp05 rp04 rp03 rp02 rp01 rp00
+ECC 1 rp15 rp14 rp13 rp12 rp11 rp10 rp09 rp08
+ECC 2 cp5 cp4 cp3 cp2 cp1 cp0 1 1
+
+I detected after writing this that ST application note AN1823
+(http://www.st.com/stonline/books/pdf/docs/10123.pdf) gives a much
+nicer picture.(but they use line parity as term where I use row parity)
+Oh well, I'm graphically challenged, so suffer with me for a moment :-)
+And I could not reuse the ST picture anyway for copyright reasons.
+
+
+Attempt 0
+=========
+
+Implementing the parity calculation is pretty simple.
+In C pseudocode:
+for (i = 0; i < 256; i++)
+{
+ if (i & 0x01)
+ rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+ else
+ rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+ if (i & 0x02)
+ rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
+ else
+ rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
+ if (i & 0x04)
+ rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
+ else
+ rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
+ if (i & 0x08)
+ rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
+ else
+ rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
+ if (i & 0x10)
+ rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
+ else
+ rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
+ if (i & 0x20)
+ rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
+ else
+ rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
+ if (i & 0x40)
+ rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
+ else
+ rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
+ if (i & 0x80)
+ rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
+ else
+ rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
+ cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
+ cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
+ cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
+ cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
+ cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
+ cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
+}
+
+
+Analysis 0
+==========
+
+C does have bitwise operators but not really operators to do the above
+efficiently (and most hardware has no such instructions either).
+Therefore without implementing this it was clear that the code above was
+not going to bring me a Nobel prize :-)
+
+Fortunately the exclusive or operation is commutative, so we can combine
+the values in any order. So instead of calculating all the bits
+individually, let us try to rearrange things.
+For the column parity this is easy. We can just xor the bytes and in the
+end filter out the relevant bits. This is pretty nice as it will bring
+all cp calculation out of the if loop.
+
+Similarly we can first xor the bytes for the various rows.
+This leads to:
+
+
+Attempt 1
+=========
+
+const char parity[256] = {
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
+};
+
+void ecc1(const unsigned char *buf, unsigned char *code)
+{
+ int i;
+ const unsigned char *bp = buf;
+ unsigned char cur;
+ unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+ unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+ unsigned char par;
+
+ par = 0;
+ rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+ rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+ rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+ rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+ for (i = 0; i < 256; i++)
+ {
+ cur = *bp++;
+ par ^= cur;
+ if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
+ if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
+ if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
+ if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
+ if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
+ if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
+ if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
+ }
+ code[0] =
+ (parity[rp7] << 7) |
+ (parity[rp6] << 6) |
+ (parity[rp5] << 5) |
+ (parity[rp4] << 4) |
+ (parity[rp3] << 3) |
+ (parity[rp2] << 2) |
+ (parity[rp1] << 1) |
+ (parity[rp0]);
+ code[1] =
+ (parity[rp15] << 7) |
+ (parity[rp14] << 6) |
+ (parity[rp13] << 5) |
+ (parity[rp12] << 4) |
+ (parity[rp11] << 3) |
+ (parity[rp10] << 2) |
+ (parity[rp9] << 1) |
+ (parity[rp8]);
+ code[2] =
+ (parity[par & 0xf0] << 7) |
+ (parity[par & 0x0f] << 6) |
+ (parity[par & 0xcc] << 5) |
+ (parity[par & 0x33] << 4) |
+ (parity[par & 0xaa] << 3) |
+ (parity[par & 0x55] << 2);
+ code[0] = ~code[0];
+ code[1] = ~code[1];
+ code[2] = ~code[2];
+}
+
+Still pretty straightforward. The last three invert statements are there to
+give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
+all data is 0xff, so the checksum then matches.
+
+I also introduced the parity lookup. I expected this to be the fastest
+way to calculate the parity, but I will investigate alternatives later
+on.
+
+
+Analysis 1
+==========
+
+The code works, but is not terribly efficient. On my system it took
+almost 4 times as much time as the linux driver code. But hey, if it was
+*that* easy this would have been done long before.
+No pain. no gain.
+
+Fortunately there is plenty of room for improvement.
+
+In step 1 we moved from bit-wise calculation to byte-wise calculation.
+However in C we can also use the unsigned long data type and virtually
+every modern microprocessor supports 32 bit operations, so why not try
+to write our code in such a way that we process data in 32 bit chunks.
+
+Of course this means some modification as the row parity is byte by
+byte. A quick analysis:
+for the column parity we use the par variable. When extending to 32 bits
+we can in the end easily calculate p0 and p1 from it.
+(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
+respectively)
+also rp2 and rp3 can be easily retrieved from par as rp3 covers the
+first two bytes and rp2 the last two bytes.
+
+Note that of course now the loop is executed only 64 times (256/4).
+And note that care must taken wrt byte ordering. The way bytes are
+ordered in a long is machine dependent, and might affect us.
+Anyway, if there is an issue: this code is developed on x86 (to be
+precise: a DELL PC with a D920 Intel CPU)
+
+And of course the performance might depend on alignment, but I expect
+that the I/O buffers in the nand driver are aligned properly (and
+otherwise that should be fixed to get maximum performance).
+
+Let's give it a try...
+
+
+Attempt 2
+=========
+
+extern const char parity[256];
+
+void ecc2(const unsigned char *buf, unsigned char *code)
+{
+ int i;
+ const unsigned long *bp = (unsigned long *)buf;
+ unsigned long cur;
+ unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+ unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+ unsigned long par;
+
+ par = 0;
+ rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+ rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+ rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+ rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+ for (i = 0; i < 64; i++)
+ {
+ cur = *bp++;
+ par ^= cur;
+ if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+ if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+ if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+ if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+ if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+ }
+ /*
+ we need to adapt the code generation for the fact that rp vars are now
+ long; also the column parity calculation needs to be changed.
+ we'll bring rp4 to 15 back to single byte entities by shifting and
+ xoring
+ */
+ rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
+ rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
+ rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
+ rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
+ rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
+ rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
+ rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
+ rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
+ rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
+ rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
+ rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
+ rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
+ rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
+ rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
+ par ^= (par >> 16);
+ rp1 = (par >> 8); rp1 &= 0xff;
+ rp0 = (par & 0xff);
+ par ^= (par >> 8); par &= 0xff;
+
+ code[0] =
+ (parity[rp7] << 7) |
+ (parity[rp6] << 6) |
+ (parity[rp5] << 5) |
+ (parity[rp4] << 4) |
+ (parity[rp3] << 3) |
+ (parity[rp2] << 2) |
+ (parity[rp1] << 1) |
+ (parity[rp0]);
+ code[1] =
+ (parity[rp15] << 7) |
+ (parity[rp14] << 6) |
+ (parity[rp13] << 5) |
+ (parity[rp12] << 4) |
+ (parity[rp11] << 3) |
+ (parity[rp10] << 2) |
+ (parity[rp9] << 1) |
+ (parity[rp8]);
+ code[2] =
+ (parity[par & 0xf0] << 7) |
+ (parity[par & 0x0f] << 6) |
+ (parity[par & 0xcc] << 5) |
+ (parity[par & 0x33] << 4) |
+ (parity[par & 0xaa] << 3) |
+ (parity[par & 0x55] << 2);
+ code[0] = ~code[0];
+ code[1] = ~code[1];
+ code[2] = ~code[2];
+}
+
+The parity array is not shown any more. Note also that for these
+examples I kinda deviated from my regular programming style by allowing
+multiple statements on a line, not using { } in then and else blocks
+with only a single statement and by using operators like ^=
+
+
+Analysis 2
+==========
+
+The code (of course) works, and hurray: we are a little bit faster than
+the linux driver code (about 15%). But wait, don't cheer too quickly.
+THere is more to be gained.
+If we look at e.g. rp14 and rp15 we see that we either xor our data with
+rp14 or with rp15. However we also have par which goes over all data.
+This means there is no need to calculate rp14 as it can be calculated from
+rp15 through rp14 = par ^ rp15;
+(or if desired we can avoid calculating rp15 and calculate it from
+rp14). That is why some places refer to inverse parity.
+Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
+Effectively this means we can eliminate the else clause from the if
+statements. Also we can optimise the calculation in the end a little bit
+by going from long to byte first. Actually we can even avoid the table
+lookups
+
+Attempt 3
+=========
+
+Odd replaced:
+ if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+ if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+ if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+ if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+ if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+with
+ if (i & 0x01) rp5 ^= cur;
+ if (i & 0x02) rp7 ^= cur;
+ if (i & 0x04) rp9 ^= cur;
+ if (i & 0x08) rp11 ^= cur;
+ if (i & 0x10) rp13 ^= cur;
+ if (i & 0x20) rp15 ^= cur;
+
+ and outside the loop added:
+ rp4 = par ^ rp5;
+ rp6 = par ^ rp7;
+ rp8 = par ^ rp9;
+ rp10 = par ^ rp11;
+ rp12 = par ^ rp13;
+ rp14 = par ^ rp15;
+
+And after that the code takes about 30% more time, although the number of
+statements is reduced. This is also reflected in the assembly code.
+
+
+Analysis 3
+==========
+
+Very weird. Guess it has to do with caching or instruction parallellism
+or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
+observation was that this one is only 30% slower (according to time)
+executing the code as my 3Ghz D920 processor.
+
+Well, it was expected not to be easy so maybe instead move to a
+different track: let's move back to the code from attempt2 and do some
+loop unrolling. This will eliminate a few if statements. I'll try
+different amounts of unrolling to see what works best.
+
+
+Attempt 4
+=========
+
+Unrolled the loop 1, 2, 3 and 4 times.
+For 4 the code starts with:
+
+ for (i = 0; i < 4; i++)
+ {
+ cur = *bp++;
+ par ^= cur;
+ rp4 ^= cur;
+ rp6 ^= cur;
+ rp8 ^= cur;
+ rp10 ^= cur;
+ if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
+ cur = *bp++;
+ par ^= cur;
+ rp5 ^= cur;
+ rp6 ^= cur;
+ ...
+
+
+Analysis 4
+==========
+
+Unrolling once gains about 15%
+Unrolling twice keeps the gain at about 15%
+Unrolling three times gives a gain of 30% compared to attempt 2.
+Unrolling four times gives a marginal improvement compared to unrolling
+three times.
+
+I decided to proceed with a four time unrolled loop anyway. It was my gut
+feeling that in the next steps I would obtain additional gain from it.
+
+The next step was triggered by the fact that par contains the xor of all
+bytes and rp4 and rp5 each contain the xor of half of the bytes.
+So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
+that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
+eliminate rp5 (or rp4, but I already foresaw another optimisation).
+The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
+
+
+Attempt 5
+=========
+
+Effectively so all odd digit rp assignments in the loop were removed.
+This included the else clause of the if statements.
+Of course after the loop we need to correct things by adding code like:
+ rp5 = par ^ rp4;
+Also the initial assignments (rp5 = 0; etc) could be removed.
+Along the line I also removed the initialisation of rp0/1/2/3.
+
+
+Analysis 5
+==========
+
+Measurements showed this was a good move. The run-time roughly halved
+compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
+of the processor time compared to the current code in the linux kernel.
+
+However, still I thought there was more. I didn't like all the if
+statements. Why not keep a running parity and only keep the last if
+statement. Time for yet another version!
+
+
+Attempt 6
+=========
+
+THe code within the for loop was changed to:
+
+ for (i = 0; i < 4; i++)
+ {
+ cur = *bp++; tmppar = cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp8 ^= cur;
+
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur;
+
+ par ^= tmppar;
+ if ((i & 0x1) == 0) rp12 ^= tmppar;
+ if ((i & 0x2) == 0) rp14 ^= tmppar;
+ }
+
+As you can see tmppar is used to accumulate the parity within a for
+iteration. In the last 3 statements is is added to par and, if needed,
+to rp12 and rp14.
+
+While making the changes I also found that I could exploit that tmppar
+contains the running parity for this iteration. So instead of having:
+rp4 ^= cur; rp6 = cur;
+I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next
+statement. A similar change was done for rp8 and rp10
+
+
+Analysis 6
+==========
+
+Measuring this code again showed big gain. When executing the original
+linux code 1 million times, this took about 1 second on my system.
+(using time to measure the performance). After this iteration I was back
+to 0.075 sec. Actually I had to decide to start measuring over 10
+million interations in order not to loose too much accuracy. This one
+definitely seemed to be the jackpot!
+
+There is a little bit more room for improvement though. There are three
+places with statements:
+rp4 ^= cur; rp6 ^= cur;
+It seems more efficient to also maintain a variable rp4_6 in the while
+loop; This eliminates 3 statements per loop. Of course after the loop we
+need to correct by adding:
+ rp4 ^= rp4_6;
+ rp6 ^= rp4_6
+Furthermore there are 4 sequential assingments to rp8. This can be
+encoded slightly more efficient by saving tmppar before those 4 lines
+and later do rp8 = rp8 ^ tmppar ^ notrp8;
+(where notrp8 is the value of rp8 before those 4 lines).
+Again a use of the commutative property of xor.
+Time for a new test!
+
+
+Attempt 7
+=========
+
+The new code now looks like:
+
+ for (i = 0; i < 4; i++)
+ {
+ cur = *bp++; tmppar = cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+ cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+ notrp8 = tmppar;
+ cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur;
+ rp8 = rp8 ^ tmppar ^ notrp8;
+
+ cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur;
+
+ par ^= tmppar;
+ if ((i & 0x1) == 0) rp12 ^= tmppar;
+ if ((i & 0x2) == 0) rp14 ^= tmppar;
+ }
+ rp4 ^= rp4_6;
+ rp6 ^= rp4_6;
+
+
+Not a big change, but every penny counts :-)
+
+
+Analysis 7
+==========
+
+Acutally this made things worse. Not very much, but I don't want to move
+into the wrong direction. Maybe something to investigate later. Could
+have to do with caching again.
+
+Guess that is what there is to win within the loop. Maybe unrolling one
+more time will help. I'll keep the optimisations from 7 for now.
+
+
+Attempt 8
+=========
+
+Unrolled the loop one more time.
+
+
+Analysis 8
+==========
+
+This makes things worse. Let's stick with attempt 6 and continue from there.
+Although it seems that the code within the loop cannot be optimised
+further there is still room to optimize the generation of the ecc codes.
+We can simply calcualate the total parity. If this is 0 then rp4 = rp5
+etc. If the parity is 1, then rp4 = !rp5;
+But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
+in the result byte and then do something like
+ code[0] |= (code[0] << 1);
+Lets test this.
+
+
+Attempt 9
+=========
+
+Changed the code but again this slightly degrades performance. Tried all
+kind of other things, like having dedicated parity arrays to avoid the
+shift after parity[rp7] << 7; No gain.
+Change the lookup using the parity array by using shift operators (e.g.
+replace parity[rp7] << 7 with:
+rp7 ^= (rp7 << 4);
+rp7 ^= (rp7 << 2);
+rp7 ^= (rp7 << 1);
+rp7 &= 0x80;
+No gain.
+
+The only marginal change was inverting the parity bits, so we can remove
+the last three invert statements.
+
+Ah well, pity this does not deliver more. Then again 10 million
+iterations using the linux driver code takes between 13 and 13.5
+seconds, whereas my code now takes about 0.73 seconds for those 10
+million iterations. So basically I've improved the performance by a
+factor 18 on my system. Not that bad. Of course on different hardware
+you will get different results. No warranties!
+
+But of course there is no such thing as a free lunch. The codesize almost
+tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
+
+
+Correcting errors
+=================
+
+For correcting errors I again used the ST application note as a starter,
+but I also peeked at the existing code.
+The algorithm itself is pretty straightforward. Just xor the given and
+the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
+are 1 we have one correctable bit error. If there is 1 bit 1, we have an
+error in the given ecc code.
+It proved to be fastest to do some table lookups. Performance gain
+introduced by this is about a factor 2 on my system when a repair had to
+be done, and 1% or so if no repair had to be done.
+Code size increased from 330 bytes to 686 bytes for this function.
+(gcc 4.2, -O3)
+
+
+Conclusion
+==========
+
+The gain when calculating the ecc is tremendous. Om my development hardware
+a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
+embedded system with a MIPS core a factor 7 was obtained.
+On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
+5 (big endian mode, gcc 4.1.2, -O3)
+For correction not much gain could be obtained (as bitflips are rare). Then
+again there are also much less cycles spent there.
+
+It seems there is not much more gain possible in this, at least when
+programmed in C. Of course it might be possible to squeeze something more
+out of it with an assembler program, but due to pipeline behaviour etc
+this is very tricky (at least for intel hw).
+
+Author: Frans Meulenbroeks
+Copyright (C) 2008 Koninklijke Philips Electronics NV.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 5ce0952aa06..49378a9f2b5 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -95,7 +95,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
'p' - Will dump the current registers and flags to your console.
-'q' - Will dump a list of all running timers.
+'q' - Will dump a list of all running hrtimers.
+ WARNING: Does not cover any other timers
'r' - Turns off keyboard raw mode and sets it to XLATE.
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
new file mode 100644
index 00000000000..125eed560e5
--- /dev/null
+++ b/Documentation/vm/unevictable-lru.txt
@@ -0,0 +1,615 @@
+
+This document describes the Linux memory management "Unevictable LRU"
+infrastructure and the use of this infrastructure to manage several types
+of "unevictable" pages. The document attempts to provide the overall
+rationale behind this mechanism and the rationale for some of the design
+decisions that drove the implementation. The latter design rationale is
+discussed in the context of an implementation description. Admittedly, one
+can obtain the implementation details--the "what does it do?"--by reading the
+code. One hopes that the descriptions below add value by provide the answer
+to "why does it do that?".
+
+Unevictable LRU Infrastructure:
+
+The Unevictable LRU adds an additional LRU list to track unevictable pages
+and to hide these pages from vmscan. This mechanism is based on a patch by
+Larry Woodman of Red Hat to address several scalability problems with page
+reclaim in Linux. The problems have been observed at customer sites on large
+memory x86_64 systems. For example, a non-numal x86_64 platform with 128GB
+of main memory will have over 32 million 4k pages in a single zone. When a
+large fraction of these pages are not evictable for any reason [see below],
+vmscan will spend a lot of time scanning the LRU lists looking for the small
+fraction of pages that are evictable. This can result in a situation where
+all cpus are spending 100% of their time in vmscan for hours or days on end,
+with the system completely unresponsive.
+
+The Unevictable LRU infrastructure addresses the following classes of
+unevictable pages:
+
++ page owned by ramfs
++ page mapped into SHM_LOCKed shared memory regions
++ page mapped into VM_LOCKED [mlock()ed] vmas
+
+The infrastructure might be able to handle other conditions that make pages
+unevictable, either by definition or by circumstance, in the future.
+
+
+The Unevictable LRU List
+
+The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
+called the "unevictable" list and an associated page flag, PG_unevictable, to
+indicate that the page is being managed on the unevictable list. The
+PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active
+flag in that it indicates on which LRU list a page resides when PG_lru is set.
+The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU
+Kconfig option.
+
+The Unevictable LRU infrastructure maintains unevictable pages on an additional
+LRU list for a few reasons:
+
+1) We get to "treat unevictable pages just like we treat other pages in the
+ system, which means we get to use the same code to manipulate them, the
+ same code to isolate them (for migrate, etc.), the same code to keep track
+ of the statistics, etc..." [Rik van Riel]
+
+2) We want to be able to migrate unevictable pages between nodes--for memory
+ defragmentation, workload management and memory hotplug. The linux kernel
+ can only migrate pages that it can successfully isolate from the lru lists.
+ If we were to maintain pages elsewise than on an lru-like list, where they
+ can be found by isolate_lru_page(), we would prevent their migration, unless
+ we reworked migration code to find the unevictable pages.
+
+
+The unevictable LRU list does not differentiate between file backed and swap
+backed [anon] pages. This differentiation is only important while the pages
+are, in fact, evictable.
+
+The unevictable LRU list benefits from the "arrayification" of the per-zone
+LRU lists and statistics originally proposed and posted by Christoph Lameter.
+
+The unevictable list does not use the lru pagevec mechanism. Rather,
+unevictable pages are placed directly on the page's zone's unevictable
+list under the zone lru_lock. The reason for this is to prevent stranding
+of pages on the unevictable list when one task has the page isolated from the
+lru and other tasks are changing the "evictability" state of the page.
+
+
+Unevictable LRU and Memory Controller Interaction
+
+The memory controller data structure automatically gets a per zone unevictable
+lru list as a result of the "arrayification" of the per-zone LRU lists. The
+memory controller tracks the movement of pages to and from the unevictable list.
+When a memory control group comes under memory pressure, the controller will
+not attempt to reclaim pages on the unevictable list. This has a couple of
+effects. Because the pages are "hidden" from reclaim on the unevictable list,
+the reclaim process can be more efficient, dealing only with pages that have
+a chance of being reclaimed. On the other hand, if too many of the pages
+charged to the control group are unevictable, the evictable portion of the
+working set of the tasks in the control group may not fit into the available
+memory. This can cause the control group to thrash or to oom-kill tasks.
+
+
+Unevictable LRU: Detecting Unevictable Pages
+
+The function page_evictable(page, vma) in vmscan.c determines whether a
+page is evictable or not. For ramfs pages and pages in SHM_LOCKed regions,
+page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's
+address space using a wrapper function. Wrapper functions are used to set,
+clear and test the flag to reduce the requirement for #ifdef's throughout the
+source code. AS_UNEVICTABLE is set on ramfs inode/mapping when it is created.
+This flag remains for the life of the inode.
+
+For shared memory regions, AS_UNEVICTABLE is set when an application
+successfully SHM_LOCKs the region and is removed when the region is
+SHM_UNLOCKed. Note that shmctl(SHM_LOCK, ...) does not populate the page
+tables for the region as does, for example, mlock(). So, we make no special
+effort to push any pages in the SHM_LOCKed region to the unevictable list.
+Vmscan will do this when/if it encounters the pages during reclaim. On
+SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the
+unevictable list if no other condition keeps them unevictable. If a SHM_LOCKed
+region is destroyed, the pages are also "rescued" from the unevictable list in
+the process of freeing them.
+
+page_evictable() detects mlock()ed pages by testing an additional page flag,
+PG_mlocked via the PageMlocked() wrapper. If the page is NOT mlocked, and a
+non-NULL vma is supplied, page_evictable() will check whether the vma is
+VM_LOCKED via is_mlocked_vma(). is_mlocked_vma() will SetPageMlocked() and
+update the appropriate statistics if the vma is VM_LOCKED. This method allows
+efficient "culling" of pages in the fault path that are being faulted in to
+VM_LOCKED vmas.
+
+
+Unevictable Pages and Vmscan [shrink_*_list()]
+
+If unevictable pages are culled in the fault path, or moved to the unevictable
+list at mlock() or mmap() time, vmscan will never encounter the pages until
+they have become evictable again, for example, via munlock() and have been
+"rescued" from the unevictable list. However, there may be situations where we
+decide, for the sake of expediency, to leave a unevictable page on one of the
+regular active/inactive LRU lists for vmscan to deal with. Vmscan checks for
+such pages in all of the shrink_{active|inactive|page}_list() functions and
+will "cull" such pages that it encounters--that is, it diverts those pages to
+the unevictable list for the zone being scanned.
+
+There may be situations where a page is mapped into a VM_LOCKED vma, but the
+page is not marked as PageMlocked. Such pages will make it all the way to
+shrink_page_list() where they will be detected when vmscan walks the reverse
+map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list()
+will cull the page at that point.
+
+Note that for anonymous pages, shrink_page_list() attempts to add the page to
+the swap cache before it tries to unmap the page. To avoid this unnecessary
+consumption of swap space, shrink_page_list() calls try_to_munlock() to check
+whether any VM_LOCKED vmas map the page without attempting to unmap the page.
+If try_to_munlock() returns SWAP_MLOCK, shrink_page_list() will cull the page
+without consuming swap space. try_to_munlock() will be described below.
+
+To "cull" an unevictable page, vmscan simply puts the page back on the lru
+list using putback_lru_page()--the inverse operation to isolate_lru_page()--
+after dropping the page lock. Because the condition which makes the page
+unevictable may change once the page is unlocked, putback_lru_page() will
+recheck the unevictable state of a page that it places on the unevictable lru
+list. If the page has become unevictable, putback_lru_page() removes it from
+the list and retries, including the page_unevictable() test. Because such a
+race is a rare event and movement of pages onto the unevictable list should be
+rare, these extra evictabilty checks should not occur in the majority of calls
+to putback_lru_page().
+
+
+Mlocked Page: Prior Work
+
+The "Unevictable Mlocked Pages" infrastructure is based on work originally
+posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
+Nick posted his patch as an alternative to a patch posted by Christoph
+Lameter to achieve the same objective--hiding mlocked pages from vmscan.
+In Nick's patch, he used one of the struct page lru list link fields as a count
+of VM_LOCKED vmas that map the page. This use of the link field for a count
+prevented the management of the pages on an LRU list. Thus, mlocked pages were
+not migratable as isolate_lru_page() could not find them and the lru list link
+field was not available to the migration subsystem. Nick resolved this by
+putting mlocked pages back on the lru list before attempting to isolate them,
+thus abandoning the count of VM_LOCKED vmas. When Nick's patch was integrated
+with the Unevictable LRU work, the count was replaced by walking the reverse
+map to determine whether any VM_LOCKED vmas mapped the page. More on this
+below.
+
+
+Mlocked Pages: Basic Management
+
+Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of
+unevictable pages. When such a page has been "noticed" by the memory
+management subsystem, the page is marked with the PG_mlocked [PageMlocked()]
+flag. A PageMlocked() page will be placed on the unevictable LRU list when
+it is added to the LRU. Pages can be "noticed" by memory management in
+several places:
+
+1) in the mlock()/mlockall() system call handlers.
+2) in the mmap() system call handler when mmap()ing a region with the
+ MAP_LOCKED flag, or mmap()ing a region in a task that has called
+ mlockall() with the MCL_FUTURE flag. Both of these conditions result
+ in the VM_LOCKED flag being set for the vma.
+3) in the fault path, if mlocked pages are "culled" in the fault path,
+ and when a VM_LOCKED stack segment is expanded.
+4) as mentioned above, in vmscan:shrink_page_list() with attempting to
+ reclaim a page in a VM_LOCKED vma--via try_to_unmap() or try_to_munlock().
+
+Mlocked pages become unlocked and rescued from the unevictable list when:
+
+1) mapped in a range unlocked via the munlock()/munlockall() system calls.
+2) munmapped() out of the last VM_LOCKED vma that maps the page, including
+ unmapping at task exit.
+3) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file.
+4) before a page is COWed in a VM_LOCKED vma.
+
+
+Mlocked Pages: mlock()/mlockall() System Call Handling
+
+Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
+for each vma in the range specified by the call. In the case of mlockall(),
+this is the entire active address space of the task. Note that mlock_fixup()
+is used for both mlock()ing and munlock()ing a range of memory. A call to
+mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED
+is treated as a no-op--mlock_fixup() simply returns.
+
+If the vma passes some filtering described in "Mlocked Pages: Filtering Vmas"
+below, mlock_fixup() will attempt to merge the vma with its neighbors or split
+off a subset of the vma if the range does not cover the entire vma. Once the
+vma has been merged or split or neither, mlock_fixup() will call
+__mlock_vma_pages_range() to fault in the pages via get_user_pages() and
+to mark the pages as mlocked via mlock_vma_page().
+
+Note that the vma being mlocked might be mapped with PROT_NONE. In this case,
+get_user_pages() will be unable to fault in the pages. That's OK. If pages
+do end up getting faulted into this VM_LOCKED vma, we'll handle them in the
+fault path or in vmscan.
+
+Also note that a page returned by get_user_pages() could be truncated or
+migrated out from under us, while we're trying to mlock it. To detect
+this, __mlock_vma_pages_range() tests the page_mapping after acquiring
+the page lock. If the page is still associated with its mapping, we'll
+go ahead and call mlock_vma_page(). If the mapping is gone, we just
+unlock the page and move on. Worse case, this results in page mapped
+in a VM_LOCKED vma remaining on a normal LRU list without being
+PageMlocked(). Again, vmscan will detect and cull such pages.
+
+mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will
+TestSetPageMlocked() for each page returned by get_user_pages(). We use
+TestSetPageMlocked() because the page might already be mlocked by another
+task/vma and we don't want to do extra work. We especially do not want to
+count an mlocked page more than once in the statistics. If the page was
+already mlocked, mlock_vma_page() is done.
+
+If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
+page from the LRU, as it is likely on the appropriate active or inactive list
+at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will
+putback the page--putback_lru_page()--which will notice that the page is now
+mlocked and divert the page to the zone's unevictable LRU list. If
+mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
+it later if/when it attempts to reclaim the page.
+
+
+Mlocked Pages: Filtering Special Vmas
+
+mlock_fixup() filters several classes of "special" vmas:
+
+1) vmas with VM_IO|VM_PFNMAP set are skipped entirely. The pages behind
+ these mappings are inherently pinned, so we don't need to mark them as
+ mlocked. In any case, most of the pages have no struct page in which to
+ so mark the page. Because of this, get_user_pages() will fail for these
+ vmas, so there is no sense in attempting to visit them.
+
+2) vmas mapping hugetlbfs page are already effectively pinned into memory.
+ We don't need nor want to mlock() these pages. However, to preserve the
+ prior behavior of mlock()--before the unevictable/mlock changes--mlock_fixup()
+ will call make_pages_present() in the hugetlbfs vma range to allocate the
+ huge pages and populate the ptes.
+
+3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of
+ kernel pages, such as the vdso page, relay channel pages, etc. These pages
+ are inherently unevictable and are not managed on the LRU lists.
+ mlock_fixup() treats these vmas the same as hugetlbfs vmas. It calls
+ make_pages_present() to populate the ptes.
+
+Note that for all of these special vmas, mlock_fixup() does not set the
+VM_LOCKED flag. Therefore, we won't have to deal with them later during
+munlock() or munmap()--for example, at task exit. Neither does mlock_fixup()
+account these vmas against the task's "locked_vm".
+
+Mlocked Pages: Downgrading the Mmap Semaphore.
+
+mlock_fixup() must be called with the mmap semaphore held for write, because
+it may have to merge or split vmas. However, mlocking a large region of
+memory can take a long time--especially if vmscan must reclaim pages to
+satisfy the regions requirements. Faulting in a large region with the mmap
+semaphore held for write can hold off other faults on the address space, in
+the case of a multi-threaded task. It can also hold off scans of the task's
+address space via /proc. While testing under heavy load, it was observed that
+the ps(1) command could be held off for many minutes while a large segment was
+mlock()ed down.
+
+To address this issue, and to make the system more responsive during mlock()ing
+of large segments, mlock_fixup() downgrades the mmap semaphore to read mode
+during the call to __mlock_vma_pages_range(). This works fine. However, the
+callers of mlock_fixup() expect the semaphore to be returned in write mode.
+So, mlock_fixup() "upgrades" the semphore to write mode. Linux does not
+support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore
+and reacquire it in write mode. In a multi-threaded task, it is possible for
+the task memory map to change while the semaphore is dropped. Therefore,
+mlock_fixup() looks up the vma at the range start address after reacquiring
+the semaphore in write mode and verifies that it still covers the original
+range. If not, mlock_fixup() returns an error [-EAGAIN]. All callers of
+mlock_fixup() have been changed to deal with this new error condition.
+
+Note: when munlocking a region, all of the pages should already be resident--
+unless we have racing threads mlocking() and munlocking() regions. So,
+unlocking should not have to wait for page allocations nor faults of any kind.
+Therefore mlock_fixup() does not downgrade the semaphore for munlock().
+
+
+Mlocked Pages: munlock()/munlockall() System Call Handling
+
+The munlock() and munlockall() system calls are handled by the same functions--
+do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock
+vs lock operation indicated by an argument. So, these system calls are also
+handled by mlock_fixup(). Again, if called for an already munlock()ed vma,
+mlock_fixup() simply returns. Because of the vma filtering discussed above,
+VM_LOCKED will not be set in any "special" vmas. So, these vmas will be
+ignored for munlock.
+
+If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off
+the specified range. The range is then munlocked via the function
+__mlock_vma_pages_range()--the same function used to mlock a vma range--
+passing a flag to indicate that munlock() is being performed.
+
+Because the vma access protections could have been changed to PROT_NONE after
+faulting in and mlocking some pages, get_user_pages() was unreliable for visiting
+these pages for munlocking. Because we don't want to leave pages mlocked(),
+get_user_pages() was enhanced to accept a flag to ignore the permissions when
+fetching the pages--all of which should be resident as a result of previous
+mlock()ing.
+
+For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling
+munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked
+flag using TestClearPageMlocked(). As with mlock_vma_page(), munlock_vma_page()
+use the Test*PageMlocked() function to handle the case where the page might
+have already been unlocked by another task. If the page was mlocked,
+munlock_vma_page() updates that zone statistics for the number of mlocked
+pages. Note, however, that at this point we haven't checked whether the page
+is mapped by other VM_LOCKED vmas.
+
+We can't call try_to_munlock(), the function that walks the reverse map to check
+for other VM_LOCKED vmas, without first isolating the page from the LRU.
+try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
+not be on an lru list. [More on these below.] However, the call to
+isolate_lru_page() could fail, in which case we couldn't try_to_munlock().
+So, we go ahead and clear PG_mlocked up front, as this might be the only chance
+we have. If we can successfully isolate the page, we go ahead and
+try_to_munlock(), which will restore the PG_mlocked flag and update the zone
+page statistics if it finds another vma holding the page mlocked. If we fail
+to isolate the page, we'll have left a potentially mlocked page on the LRU.
+This is fine, because we'll catch it later when/if vmscan tries to reclaim the
+page. This should be relatively rare.
+
+Mlocked Pages: Migrating Them...
+
+A page that is being migrated has been isolated from the lru lists and is
+held locked across unmapping of the page, updating the page's mapping
+[address_space] entry and copying the contents and state, until the
+page table entry has been replaced with an entry that refers to the new
+page. Linux supports migration of mlocked pages and other unevictable
+pages. This involves simply moving the PageMlocked and PageUnevictable states
+from the old page to the new page.
+
+Note that page migration can race with mlocking or munlocking of the same
+page. This has been discussed from the mlock/munlock perspective in the
+respective sections above. Both processes [migration, m[un]locking], hold
+the page locked. This provides the first level of synchronization. Page
+migration zeros out the page_mapping of the old page before unlocking it,
+so m[un]lock can skip these pages by testing the page mapping under page
+lock.
+
+When completing page migration, we place the new and old pages back onto the
+lru after dropping the page lock. The "unneeded" page--old page on success,
+new page on failure--will be freed when the reference count held by the
+migration process is released. To ensure that we don't strand pages on the
+unevictable list because of a race between munlock and migration, page
+migration uses the putback_lru_page() function to add migrated pages back to
+the lru.
+
+
+Mlocked Pages: mmap(MAP_LOCKED) System Call Handling
+
+In addition the the mlock()/mlockall() system calls, an application can request
+that a region of memory be mlocked using the MAP_LOCKED flag with the mmap()
+call. Furthermore, any mmap() call or brk() call that expands the heap by a
+task that has previously called mlockall() with the MCL_FUTURE flag will result
+in the newly mapped memory being mlocked. Before the unevictable/mlock changes,
+the kernel simply called make_pages_present() to allocate pages and populate
+the page table.
+
+To mlock a range of memory under the unevictable/mlock infrastructure, the
+mmap() handler and task address space expansion functions call
+mlock_vma_pages_range() specifying the vma and the address range to mlock.
+mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in
+"Mlocked Pages: Filtering Vmas". It will clear the VM_LOCKED flag, which will
+have already been set by the caller, in filtered vmas. Thus these vma's need
+not be visited for munlock when the region is unmapped.
+
+For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
+fault/allocate the pages and mlock them. Again, like mlock_fixup(),
+mlock_vma_pages_range() downgrades the mmap semaphore to read mode before
+attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore
+back to write mode before returning.
+
+The callers of mlock_vma_pages_range() will have already added the memory
+range to be mlocked to the task's "locked_vm". To account for filtered vmas,
+mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the
+callers then subtract a non-negative return value from the task's locked_vm.
+A negative return value represent an error--for example, from get_user_pages()
+attempting to fault in a vma with PROT_NONE access. In this case, we leave
+the memory range accounted as locked_vm, as the protections could be changed
+later and pages allocated into that region.
+
+
+Mlocked Pages: munmap()/exit()/exec() System Call Handling
+
+When unmapping an mlocked region of memory, whether by an explicit call to
+munmap() or via an internal unmap from exit() or exec() processing, we must
+munlock the pages if we're removing the last VM_LOCKED vma that maps the pages.
+Before the unevictable/mlock changes, mlocking did not mark the pages in any way,
+so unmapping them required no processing.
+
+To munlock a range of memory under the unevictable/mlock infrastructure, the
+munmap() hander and task address space tear down function call
+munlock_vma_pages_all(). The name reflects the observation that one always
+specifies the entire vma range when munlock()ing during unmap of a region.
+Because of the vma filtering when mlocking() regions, only "normal" vmas that
+actually contain mlocked pages will be passed to munlock_vma_pages_all().
+
+munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup()
+for the munlock case, calls __munlock_vma_pages_range() to walk the page table
+for the vma's memory range and munlock_vma_page() each resident page mapped by
+the vma. This effectively munlocks the page, only if this is the last
+VM_LOCKED vma that maps the page.
+
+
+Mlocked Page: try_to_unmap()
+
+[Note: the code changes represented by this section are really quite small
+compared to the text to describe what happening and why, and to discuss the
+implications.]
+
+Pages can, of course, be mapped into multiple vmas. Some of these vmas may
+have VM_LOCKED flag set. It is possible for a page mapped into one or more
+VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one
+of the active or inactive LRU lists. This could happen if, for example, a
+task in the process of munlock()ing the page could not isolate the page from
+the LRU. As a result, vmscan/shrink_page_list() might encounter such a page
+as described in "Unevictable Pages and Vmscan [shrink_*_list()]". To
+handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED
+vmas while it is walking a page's reverse map.
+
+try_to_unmap() is always called, by either vmscan for reclaim or for page
+migration, with the argument page locked and isolated from the LRU. BUG_ON()
+assertions enforce this requirement. Separate functions handle anonymous and
+mapped file pages, as these types of pages have different reverse map
+mechanisms.
+
+ try_to_unmap_anon()
+
+To unmap anonymous pages, each vma in the list anchored in the anon_vma must be
+visited--at least until a VM_LOCKED vma is encountered. If the page is being
+unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked
+pages are migratable. However, for reclaim, if the page is mapped into a
+VM_LOCKED vma, the scan stops. try_to_unmap() attempts to acquire the mmap
+semphore of the mm_struct to which the vma belongs in read mode. If this is
+successful, try_to_unmap() will mlock the page via mlock_vma_page()--we
+wouldn't have gotten to try_to_unmap() if the page were already mlocked--and
+will return SWAP_MLOCK, indicating that the page is unevictable. If the
+mmap semaphore cannot be acquired, we are not sure whether the page is really
+unevictable or not. In this case, try_to_unmap() will return SWAP_AGAIN.
+
+ try_to_unmap_file() -- linear mappings
+
+Unmapping of a mapped file page works the same, except that the scan visits
+all vmas that maps the page's index/page offset in the page's mapping's
+reverse map priority search tree. It must also visit each vma in the page's
+mapping's non-linear list, if the list is non-empty. As for anonymous pages,
+on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will
+attempt to acquire the associated mm_struct's mmap semaphore to mlock the page,
+returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not.
+
+ try_to_unmap_file() -- non-linear mappings
+
+If a page's mapping contains a non-empty non-linear mapping vma list, then
+try_to_un{map|lock}() must also visit each vma in that list to determine
+whether the page is mapped in a VM_LOCKED vma. Again, the scan must visit
+all vmas in the non-linear list to ensure that the pages is not/should not be
+mlocked. If a VM_LOCKED vma is found in the list, the scan could terminate.
+However, there is no easy way to determine whether the page is actually mapped
+in a given vma--either for unmapping or testing whether the VM_LOCKED vma
+actually pins the page.
+
+So, try_to_unmap_file() handles non-linear mappings by scanning a certain
+number of pages--a "cluster"--in each non-linear vma associated with the page's
+mapping, for each file mapped page that vmscan tries to unmap. If this happens
+to unmap the page we're trying to unmap, try_to_unmap() will notice this on
+return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS. Otherwise, it
+will return SWAP_AGAIN, causing vmscan to recirculate this page. We take
+advantage of the cluster scan in try_to_unmap_cluster() as follows:
+
+For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap
+semaphore of the associated mm_struct for read without blocking. If this
+attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will
+retain the mmap semaphore for the scan; otherwise it drops it here. Then,
+for each page in the cluster, if we're holding the mmap semaphore for a locked
+vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page. This
+call is a no-op if the page is already locked, but will mlock any pages in
+the non-linear mapping that happen to be unlocked. If one of the pages so
+mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will
+return SWAP_MLOCK, rather than the default SWAP_AGAIN. This will allow vmscan
+to cull the page, rather than recirculating it on the inactive list. Again,
+if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns
+SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but
+couldn't be mlocked.
+
+
+Mlocked pages: try_to_munlock() Reverse Map Scan
+
+TODO/FIXME: a better name might be page_mlocked()--analogous to the
+page_referenced() reverse map walker--especially if we continue to call this
+from shrink_page_list(). See related TODO/FIXME below.
+
+When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() System
+Call Handling" above--tries to munlock a page, or when shrink_page_list()
+encounters an anonymous page that is not yet in the swap cache, they need to
+determine whether or not the page is mapped by any VM_LOCKED vma, without
+actually attempting to unmap all ptes from the page. For this purpose, the
+unevictable/mlock infrastructure introduced a variant of try_to_unmap() called
+try_to_munlock().
+
+try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
+mapped file pages with an additional argument specifing unlock versus unmap
+processing. Again, these functions walk the respective reverse maps looking
+for VM_LOCKED vmas. When such a vma is found for anonymous pages and file
+pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
+attempt to acquire the associated mmap semphore, mlock the page via
+mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the
+pre-clearing of the page's PG_mlocked done by munlock_vma_page() and informs
+shrink_page_list() that the anonymous page should be culled rather than added
+to the swap cache in preparation for a try_to_unmap() that will almost
+certainly fail.
+
+If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap
+semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list()
+to recycle the page on the inactive list and hope that it has better luck
+with the page next time.
+
+For file pages mapped into non-linear vmas, the try_to_munlock() logic works
+slightly differently. On encountering a VM_LOCKED non-linear vma that might
+map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking
+the page. munlock_vma_page() will just leave the page unlocked and let
+vmscan deal with it--the usual fallback position.
+
+Note that try_to_munlock()'s reverse map walk must visit every vma in a pages'
+reverse map to determine that a page is NOT mapped into any VM_LOCKED vma.
+However, the scan can terminate when it encounters a VM_LOCKED vma and can
+successfully acquire the vma's mmap semphore for read and mlock the page.
+Although try_to_munlock() can be called many [very many!] times when
+munlock()ing a large region or tearing down a large address space that has been
+mlocked via mlockall(), overall this is a fairly rare event. In addition,
+although shrink_page_list() calls try_to_munlock() for every anonymous page that
+it handles that is not yet in the swap cache, on average anonymous pages will
+have very short reverse map lists.
+
+Mlocked Page: Page Reclaim in shrink_*_list()
+
+shrink_active_list() culls any obviously unevictable pages--i.e.,
+!page_evictable(page, NULL)--diverting these to the unevictable lru
+list. However, shrink_active_list() only sees unevictable pages that
+made it onto the active/inactive lru lists. Note that these pages do not
+have PageUnevictable set--otherwise, they would be on the unevictable list and
+shrink_active_list would never see them.
+
+Some examples of these unevictable pages on the LRU lists are:
+
+1) ramfs pages that have been placed on the lru lists when first allocated.
+
+2) SHM_LOCKed shared memory pages. shmctl(SHM_LOCK) does not attempt to
+ allocate or fault in the pages in the shared memory region. This happens
+ when an application accesses the page the first time after SHM_LOCKing
+ the segment.
+
+3) Mlocked pages that could not be isolated from the lru and moved to the
+ unevictable list in mlock_vma_page().
+
+3) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't
+ acquire the vma's mmap semaphore to test the flags and set PageMlocked.
+ munlock_vma_page() was forced to let the page back on to the normal
+ LRU list for vmscan to handle.
+
+shrink_inactive_list() also culls any unevictable pages that it finds
+on the inactive lists, again diverting them to the appropriate zone's unevictable
+lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became
+SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or
+pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from
+the lru to recheck via try_to_munlock(). shrink_inactive_list() won't notice
+the latter, but will pass on to shrink_page_list().
+
+shrink_page_list() again culls obviously unevictable pages that it could
+encounter for similar reason to shrink_inactive_list(). As already discussed,
+shrink_page_list() proactively looks for anonymous pages that should have
+PG_mlocked set but don't--these would not be detected by page_evictable()--to
+avoid adding them to the swap cache unnecessarily. File pages mapped into
+VM_LOCKED vmas but without PG_mlocked set will make it all the way to
+try_to_unmap(). shrink_page_list() will divert them to the unevictable list when
+try_to_unmap() returns SWAP_MLOCK, as discussed above.
+
+TODO/FIXME: If we can enhance the swap cache to reliably remove entries
+with page_count(page) > 2, as long as all ptes are mapped to the page and
+not the swap entry, we can probably remove the call to try_to_munlock() in
+shrink_page_list() and just remove the page from the swap cache when
+try_to_unmap() returns SWAP_MLOCK. Currently, remove_exclusive_swap_page()
+doesn't seem to allow that.
+
+
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index a0f642b6a4b..6110197757a 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -70,6 +70,7 @@ config AUTO_IRQ_AFFINITY
default y
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
menu "System setup"
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 15fda434442..d069526bd76 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -74,12 +74,14 @@ register struct thread_info *__current_thread_info __asm__("$8");
#define TIF_UAC_SIGBUS 7
#define TIF_MEMDIE 8
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
+#define TIF_FREEZE 16 /* is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE (1<<TIF_FREEZE)
/* Work to do on interrupt/exception return. */
#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED)
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 04dcc5e5d4c..9cd8dca742a 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -655,7 +655,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
case 0x71: /* RTC_PORT(1) */
rtc_access.index = index;
- rtc_access.data = BCD_TO_BIN(b);
+ rtc_access.data = bcd2bin(b);
rtc_access.function = 0x48 + !write; /* GET/PUT_TOY */
#ifdef CONFIG_SMP
@@ -668,7 +668,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
#else
__marvel_access_rtc(&rtc_access);
#endif
- ret = BIN_TO_BCD(rtc_access.data);
+ ret = bin2bcd(rtc_access.data);
break;
default:
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 75480cab089..e6a231435cb 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -346,12 +346,12 @@ time_init(void)
year = CMOS_READ(RTC_YEAR);
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
+ sec = bcd2bin(sec);
+ min = bcd2bin(min);
+ hour = bcd2bin(hour);
+ day = bcd2bin(day);
+ mon = bcd2bin(mon);
+ year = bcd2bin(year);
}
/* PC-like is standard; used for year >= 70 */
@@ -525,7 +525,7 @@ set_rtc_mmss(unsigned long nowtime)
cmos_minutes = CMOS_READ(RTC_MINUTES);
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- BCD_TO_BIN(cmos_minutes);
+ cmos_minutes = bcd2bin(cmos_minutes);
/*
* since we're only adjusting minutes and seconds,
@@ -543,8 +543,8 @@ set_rtc_mmss(unsigned long nowtime)
if (abs(real_minutes - cmos_minutes) < 30) {
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
+ real_seconds = bin2bcd(real_seconds);
+ real_minutes = bin2bcd(real_minutes);
}
CMOS_WRITE(real_seconds,RTC_SECONDS);
CMOS_WRITE(real_minutes,RTC_MINUTES);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4853f9df37b..df39d20f742 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -192,6 +192,8 @@ config VECTORS_BASE
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "System Type"
choice
diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
index eb4b190b665..eb35fca9aea 100644
--- a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
+++ b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
@@ -4,6 +4,43 @@
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
+struct pxa3xx_nand_timing {
+ unsigned int tCH; /* Enable signal hold time */
+ unsigned int tCS; /* Enable signal setup time */
+ unsigned int tWH; /* ND_nWE high duration */
+ unsigned int tWP; /* ND_nWE pulse time */
+ unsigned int tRH; /* ND_nRE high duration */
+ unsigned int tRP; /* ND_nRE pulse width */
+ unsigned int tR; /* ND_nWE high to ND_nRE low for read */
+ unsigned int tWHR; /* ND_nWE high to ND_nRE low for status read */
+ unsigned int tAR; /* ND_ALE low to ND_nRE low delay */
+};
+
+struct pxa3xx_nand_cmdset {
+ uint16_t read1;
+ uint16_t read2;
+ uint16_t program;
+ uint16_t read_status;
+ uint16_t read_id;
+ uint16_t erase;
+ uint16_t reset;
+ uint16_t lock;
+ uint16_t unlock;
+ uint16_t lock_status;
+};
+
+struct pxa3xx_nand_flash {
+ const struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
+ const struct pxa3xx_nand_cmdset *cmdset;
+
+ uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
+ uint32_t page_size; /* Page size in bytes (PAGE_SZ) */
+ uint32_t flash_width; /* Width of Flash memory (DWIDTH_M) */
+ uint32_t dfc_width; /* Width of flash controller(DWIDTH_C) */
+ uint32_t num_blocks; /* Number of physical blocks in Flash */
+ uint32_t chip_id;
+};
+
struct pxa3xx_nand_platform_data {
/* the data flash bus is shared between the Static Memory
@@ -12,8 +49,11 @@ struct pxa3xx_nand_platform_data {
*/
int enable_arbiter;
- struct mtd_partition *parts;
- unsigned int nr_parts;
+ const struct mtd_partition *parts;
+ unsigned int nr_parts;
+
+ const struct pxa3xx_nand_flash * flash;
+ size_t num_flash;
};
extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info);
diff --git a/arch/arm/plat-mxc/include/mach/mxc_nand.h b/arch/arm/plat-mxc/include/mach/mxc_nand.h
new file mode 100644
index 00000000000..2b972df22d1
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/mxc_nand.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Sascha Hauer, kernel@pengutronix.de
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __ASM_ARCH_NAND_H
+#define __ASM_ARCH_NAND_H
+
+struct mxc_nand_platform_data {
+ int width; /* data bus width in bytes */
+ int hw_ecc; /* 0 if supress hardware ECC */
+};
+#endif /* __ASM_ARCH_NAND_H */
diff --git a/arch/arm/plat-omap/include/mach/onenand.h b/arch/arm/plat-omap/include/mach/onenand.h
index d57f20226b2..4649d302c26 100644
--- a/arch/arm/plat-omap/include/mach/onenand.h
+++ b/arch/arm/plat-omap/include/mach/onenand.h
@@ -16,6 +16,10 @@ struct omap_onenand_platform_data {
int gpio_irq;
struct mtd_partition *parts;
int nr_parts;
- int (*onenand_setup)(void __iomem *);
+ int (*onenand_setup)(void __iomem *, int freq);
int dma_channel;
};
+
+int omap2_onenand_rephase(void);
+
+#define ONENAND_MAX_PARTITIONS 8
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index 7c239a91627..33a5b2969eb 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -72,6 +72,8 @@ config GENERIC_BUG
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "System Type and features"
source "kernel/time/Kconfig"
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 294b25f9323..4442f8d2d42 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -96,6 +96,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
#define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
+#define _TIF_FREEZE (1 << TIF_FREEZE)
/* Note: The masks below must never span more than 16 bits! */
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 8102c79aaa9..29e71ed6b8a 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -64,8 +64,11 @@ config HARDWARE_PM
depends on OPROFILE
source "init/Kconfig"
+
source "kernel/Kconfig.preempt"
+source "kernel/Kconfig.freezer"
+
menu "Blackfin Processor Options"
comment "Processor and Board Settings"
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 9389d38f222..07335e719bf 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -62,6 +62,8 @@ config HZ
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "General setup"
source "fs/Kconfig.binfmt"
diff --git a/arch/cris/arch-v10/drivers/ds1302.c b/arch/cris/arch-v10/drivers/ds1302.c
index c9aa3904be0..3bdfaf43390 100644
--- a/arch/cris/arch-v10/drivers/ds1302.c
+++ b/arch/cris/arch-v10/drivers/ds1302.c
@@ -215,12 +215,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
local_irq_restore(flags);
- BCD_TO_BIN(rtc_tm->tm_sec);
- BCD_TO_BIN(rtc_tm->tm_min);
- BCD_TO_BIN(rtc_tm->tm_hour);
- BCD_TO_BIN(rtc_tm->tm_mday);
- BCD_TO_BIN(rtc_tm->tm_mon);
- BCD_TO_BIN(rtc_tm->tm_year);
+ rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+ rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+ rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+ rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+ rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+ rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
/*
* Account for differences between how the RTC uses the values
@@ -295,12 +295,12 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
else
yrs -= 1900; /* RTC (70, 71, ... 99) */
- BIN_TO_BCD(sec);
- BIN_TO_BCD(min);
- BIN_TO_BCD(hrs);
- BIN_TO_BCD(day);
- BIN_TO_BCD(mon);
- BIN_TO_BCD(yrs);
+ sec = bin2bcd(sec);
+ min = bin2bcd(min);
+ hrs = bin2bcd(hrs);
+ day = bin2bcd(day);
+ mon = bin2bcd(mon);
+ yrs = bin2bcd(yrs);
local_irq_save(flags);
CMOS_WRITE(yrs, RTC_YEAR);
diff --git a/arch/cris/arch-v10/drivers/pcf8563.c b/arch/cris/arch-v10/drivers/pcf8563.c
index 8769dc91407..1e90c1a9c84 100644
--- a/arch/cris/arch-v10/drivers/pcf8563.c
+++ b/arch/cris/arch-v10/drivers/pcf8563.c
@@ -122,7 +122,7 @@ get_rtc_time(struct rtc_time *tm)
"information is no longer guaranteed!\n", PCF8563_NAME);
}
- tm->tm_year = BCD_TO_BIN(tm->tm_year) +
+ tm->tm_year = bcd2bin(tm->tm_year) +
((tm->tm_mon & 0x80) ? 100 : 0);
tm->tm_sec &= 0x7F;
tm->tm_min &= 0x7F;
@@ -131,11 +131,11 @@ get_rtc_time(struct rtc_time *tm)
tm->tm_wday &= 0x07; /* Not coded in BCD. */
tm->tm_mon &= 0x1F;
- BCD_TO_BIN(tm->tm_sec);
- BCD_TO_BIN(tm->tm_min);
- BCD_TO_BIN(tm->tm_hour);
- BCD_TO_BIN(tm->tm_mday);
- BCD_TO_BIN(tm->tm_mon);
+ tm->tm_sec = bcd2bin(tm->tm_sec);
+ tm->tm_min = bcd2bin(tm->tm_min);
+ tm->tm_hour = bcd2bin(tm->tm_hour);
+ tm->tm_mday = bcd2bin(tm->tm_mday);
+ tm->tm_mon = bcd2bin(tm->tm_mon);
tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
}
@@ -282,12 +282,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
century = (tm.tm_year >= 2000) ? 0x80 : 0;
tm.tm_year = tm.tm_year % 100;
- BIN_TO_BCD(tm.tm_year);
- BIN_TO_BCD(tm.tm_mon);
- BIN_TO_BCD(tm.tm_mday);
- BIN_TO_BCD(tm.tm_hour);
- BIN_TO_BCD(tm.tm_min);
- BIN_TO_BCD(tm.tm_sec);
+ tm.tm_year = bin2bcd(tm.tm_year);
+ tm.tm_mon = bin2bcd(tm.tm_mon);
+ tm.tm_mday = bin2bcd(tm.tm_mday);
+ tm.tm_hour = bin2bcd(tm.tm_hour);
+ tm.tm_min = bin2bcd(tm.tm_min);
+ tm.tm_sec = bin2bcd(tm.tm_sec);
tm.tm_mon |= century;
mutex_lock(&rtc_lock);
diff --git a/arch/cris/arch-v32/drivers/pcf8563.c b/arch/cris/arch-v32/drivers/pcf8563.c
index f263ab57122..f4478506e52 100644
--- a/arch/cris/arch-v32/drivers/pcf8563.c
+++ b/arch/cris/arch-v32/drivers/pcf8563.c
@@ -118,7 +118,7 @@ get_rtc_time(struct rtc_time *tm)
"information is no longer guaranteed!\n", PCF8563_NAME);
}
- tm->tm_year = BCD_TO_BIN(tm->tm_year) +
+ tm->tm_year = bcd2bin(tm->tm_year) +
((tm->tm_mon & 0x80) ? 100 : 0);
tm->tm_sec &= 0x7F;
tm->tm_min &= 0x7F;
@@ -127,11 +127,11 @@ get_rtc_time(struct rtc_time *tm)
tm->tm_wday &= 0x07; /* Not coded in BCD. */
tm->tm_mon &= 0x1F;
- BCD_TO_BIN(tm->tm_sec);
- BCD_TO_BIN(tm->tm_min);
- BCD_TO_BIN(tm->tm_hour);
- BCD_TO_BIN(tm->tm_mday);
- BCD_TO_BIN(tm->tm_mon);
+ tm->tm_sec = bcd2bin(tm->tm_sec);
+ tm->tm_min = bcd2bin(tm->tm_min);
+ tm->tm_hour = bcd2bin(tm->tm_hour);
+ tm->tm_mday = bcd2bin(tm->tm_mday);
+ tm->tm_mon = bcd2bin(tm->tm_mon);
tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
}
@@ -279,12 +279,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
century = (tm.tm_year >= 2000) ? 0x80 : 0;
tm.tm_year = tm.tm_year % 100;
- BIN_TO_BCD(tm.tm_year);
- BIN_TO_BCD(tm.tm_mon);
- BIN_TO_BCD(tm.tm_mday);
- BIN_TO_BCD(tm.tm_hour);
- BIN_TO_BCD(tm.tm_min);
- BIN_TO_BCD(tm.tm_sec);
+ tm.tm_year = bin2bcd(tm.tm_year);
+ tm.tm_mon = bin2bcd(tm.tm_mon);
+ tm.tm_mday = bin2bcd(tm.tm_mday);
+ tm.tm_hour = bin2bcd(tm.tm_hour);
+ tm.tm_min = bin2bcd(tm.tm_min);
+ tm.tm_sec = bin2bcd(tm.tm_sec);
tm.tm_mon |= century;
mutex_lock(&rtc_lock);
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index ff4c6aa75de..074fe7dea96 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -127,7 +127,7 @@ int set_rtc_mmss(unsigned long nowtime)
return 0;
cmos_minutes = CMOS_READ(RTC_MINUTES);
- BCD_TO_BIN(cmos_minutes);
+ cmos_minutes = bcd2bin(cmos_minutes);
/*
* since we're only adjusting minutes and seconds,
@@ -142,8 +142,8 @@ int set_rtc_mmss(unsigned long nowtime)
real_minutes %= 60;
if (abs(real_minutes - cmos_minutes) < 30) {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
+ real_seconds = bin2bcd(real_seconds);
+ real_minutes = bin2bcd(real_minutes);
CMOS_WRITE(real_seconds,RTC_SECONDS);
CMOS_WRITE(real_minutes,RTC_MINUTES);
} else {
@@ -170,12 +170,12 @@ get_cmos_time(void)
mon = CMOS_READ(RTC_MONTH);
year = CMOS_READ(RTC_YEAR);
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
+ sec = bcd2bin(sec);
+ min = bcd2bin(min);
+ hour = bcd2bin(hour);
+ day = bcd2bin(day);
+ mon = bcd2bin(mon);
+ year = bcd2bin(year);
if ((year += 1900) < 1970)
year += 100;
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index a5aac1b0756..9d1552a9ee2 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -66,6 +66,8 @@ mainmenu "Fujitsu FR-V Kernel Configuration"
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "Fujitsu FR-V system setup"
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index c7966746fbf..bd1995403c6 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -90,6 +90,8 @@ config HZ
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
source "arch/h8300/Kconfig.cpu"
menu "Executable file formats"
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index aafd4d322ec..700014d2155 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -89,6 +89,7 @@ static inline struct thread_info *current_thread_info(void)
TIF_NEED_RESCHED */
#define TIF_MEMDIE 4
#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
+#define TIF_FREEZE 16 /* is freezing for suspend */
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE (1<<TIF_FREEZE)
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3b7aa38254a..912c57db2d2 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -7,6 +7,8 @@ mainmenu "IA-64 Linux Kernel Configuration"
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "Processor type and features"
config IA64
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 4956be40d7b..d98f0f4ff83 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2070,14 +2070,13 @@ sba_init(void)
if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
return 0;
-#if defined(CONFIG_IA64_GENERIC) && defined(CONFIG_CRASH_DUMP) && \
- defined(CONFIG_PROC_FS)
+#if defined(CONFIG_IA64_GENERIC)
/* If we are booting a kdump kernel, the sba_iommu will
* cause devices that were not shutdown properly to MCA
* as soon as they are turned back on. Our only option for
* a successful kdump kernel boot is to use the swiotlb.
*/
- if (elfcorehdr_addr < ELFCORE_ADDR_MAX) {
+ if (is_kdump_kernel()) {
if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
panic("Unable to initialize software I/O TLB:"
" Try machvec=dig boot option");
diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c
index da60e90eeeb..23e91290e41 100644
--- a/arch/ia64/kernel/crash_dump.c
+++ b/arch/ia64/kernel/crash_dump.c
@@ -8,10 +8,14 @@
#include <linux/errno.h>
#include <linux/types.h>
+#include <linux/crash_dump.h>
#include <asm/page.h>
#include <asm/uaccess.h>
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
/**
* copy_oldmem_page - copy one page from "oldmem"
* @pfn: page frame number to be copied
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 51b75cea701..efaff15d8cf 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1335,7 +1335,7 @@ kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n)
}
#endif
-#ifdef CONFIG_PROC_VMCORE
+#ifdef CONFIG_CRASH_DUMP
/* locate the size find a the descriptor at a certain address */
unsigned long __init
vmcore_find_descriptor_size (unsigned long address)
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index de636b21567..916ba898237 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -352,7 +352,7 @@ reserve_memory (void)
}
#endif
-#ifdef CONFIG_PROC_VMCORE
+#ifdef CONFIG_CRASH_KERNEL
if (reserve_elfcorehdr(&rsvd_region[n].start,
&rsvd_region[n].end) == 0)
n++;
@@ -478,7 +478,12 @@ static __init int setup_nomca(char *s)
}
early_param("nomca", setup_nomca);
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+#ifdef CONFIG_CRASH_DUMP
/* elfcorehdr= specifies the location of elf core header
* stored by the crashed kernel.
*/
@@ -502,11 +507,11 @@ int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
* to work properly.
*/
- if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
+ if (!is_vmcore_usable())
return -EINVAL;
if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
- elfcorehdr_addr = ELFCORE_ADDR_MAX;
+ vmcore_unusable();
return -EINVAL;
}
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index f482a9098e3..054bcd9439a 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -700,23 +700,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
return ret;
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
- unsigned long start_pfn, end_pfn;
- unsigned long timeout = 120 * HZ;
- int ret;
- start_pfn = start >> PAGE_SHIFT;
- end_pfn = start_pfn + (size >> PAGE_SHIFT);
- ret = offline_pages(start_pfn, end_pfn, timeout);
- if (ret)
- goto out;
- /* we can free mem_map at this point */
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(remove_memory);
-#endif /* CONFIG_MEMORY_HOTREMOVE */
#endif
/*
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 00289c178f8..dbaed4a6381 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -42,6 +42,8 @@ config HZ
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "Processor type and features"
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 677c93a490f..836fb66f080 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -62,6 +62,8 @@ mainmenu "Linux/68k Kernel Configuration"
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "Platform dependent setup"
config EISA
diff --git a/arch/m68k/bvme6000/rtc.c b/arch/m68k/bvme6000/rtc.c
index 808c9018b11..c50bec8aabb 100644
--- a/arch/m68k/bvme6000/rtc.c
+++ b/arch/m68k/bvme6000/rtc.c
@@ -18,7 +18,6 @@
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/mc146818rtc.h> /* For struct rtc_time and ioctls, etc */
-#include <linux/smp_lock.h>
#include <linux/bcd.h>
#include <asm/bvme6000hw.h>
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 0a8998315e5..76b66feb74d 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -75,6 +75,8 @@ config NO_IOPORT
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "Processor type and features"
choice
diff --git a/arch/m68knommu/include/asm/thread_info.h b/arch/m68knommu/include/asm/thread_info.h
index 0c9bc095f3f..82529f424ea 100644
--- a/arch/m68knommu/include/asm/thread_info.h
+++ b/arch/m68knommu/include/asm/thread_info.h
@@ -84,12 +84,14 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
TIF_NEED_RESCHED */
#define TIF_MEMDIE 4
+#define TIF_FREEZE 16 /* is freezing for suspend */
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE (1<<TIF_FREEZE)
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b905744d791..5f149b030c0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1885,6 +1885,8 @@ config PROBE_INITRD_HEADER
add initrd or initramfs image to the kernel image.
Otherwise, say N.
+source "kernel/Kconfig.freezer"
+
menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)"
config HW_HAS_EISA
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 3965fda94a8..1359c03ded5 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -45,12 +45,12 @@ unsigned long read_persistent_clock(void)
spin_unlock_irqrestore(&rtc_lock, flags);
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- sec = BCD2BIN(sec);
- min = BCD2BIN(min);
- hour = BCD2BIN(hour);
- day = BCD2BIN(day);
- mon = BCD2BIN(mon);
- year = BCD2BIN(year);
+ sec = bcd2bin(sec);
+ min = bcd2bin(min);
+ hour = bcd2bin(hour);
+ day = bcd2bin(day);
+ mon = bcd2bin(mon);
+ year = bcd2bin(year);
}
year += real_year - 72 + 2000;
@@ -83,7 +83,7 @@ int rtc_mips_set_mmss(unsigned long nowtime)
cmos_minutes = CMOS_READ(RTC_MINUTES);
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- cmos_minutes = BCD2BIN(cmos_minutes);
+ cmos_minutes = bcd2bin(cmos_minutes);
/*
* since we're only adjusting minutes and seconds,
@@ -99,8 +99,8 @@ int rtc_mips_set_mmss(unsigned long nowtime)
if (abs(real_minutes - cmos_minutes) < 30) {
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- real_seconds = BIN2BCD(real_seconds);
- real_minutes = BIN2BCD(real_minutes);
+ real_seconds = bin2bcd(real_seconds);
+ real_minutes = bin2bcd(real_minutes);
}
CMOS_WRITE(real_seconds, RTC_SECONDS);
CMOS_WRITE(real_minutes, RTC_MINUTES);
diff --git a/arch/mips/include/asm/mc146818-time.h b/arch/mips/include/asm/mc146818-time.h
index cdc379a0a94..199b45733a9 100644
--- a/arch/mips/include/asm/mc146818-time.h
+++ b/arch/mips/include/asm/mc146818-time.h
@@ -44,7 +44,7 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
cmos_minutes = CMOS_READ(RTC_MINUTES);
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- BCD_TO_BIN(cmos_minutes);
+ cmos_minutes = bcd2bin(cmos_minutes);
/*
* since we're only adjusting minutes and seconds,
@@ -60,8 +60,8 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
if (abs(real_minutes - cmos_minutes) < 30) {
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
+ real_seconds = bin2bcd(real_seconds);
+ real_minutes = bin2bcd(real_minutes);
}
CMOS_WRITE(real_seconds, RTC_SECONDS);
CMOS_WRITE(real_minutes, RTC_MINUTES);
@@ -103,12 +103,12 @@ static inline unsigned long mc146818_get_cmos_time(void)
} while (sec != CMOS_READ(RTC_SECONDS));
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
+ sec = bcd2bin(sec);
+ min = bcd2bin(min);
+ hour = bcd2bin(hour);
+ day = bcd2bin(day);
+ mon = bcd2bin(mon);
+ year = bcd2bin(year);
}
spin_unlock_irqrestore(&rtc_lock, flags);
year = mc146818_decode_year(year);
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 6537d90a25b..2d3c0dca275 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -79,14 +79,14 @@ unsigned long read_persistent_clock(void)
/* Stop the update to the time */
m48t37_base->control = 0x40;
- year = BCD2BIN(m48t37_base->year);
- year += BCD2BIN(m48t37_base->century) * 100;
+ year = bcd2bin(m48t37_base->year);
+ year += bcd2bin(m48t37_base->century) * 100;
- month = BCD2BIN(m48t37_base->month);
- day = BCD2BIN(m48t37_base->date);
- hour = BCD2BIN(m48t37_base->hour);
- min = BCD2BIN(m48t37_base->min);
- sec = BCD2BIN(m48t37_base->sec);
+ month = bcd2bin(m48t37_base->month);
+ day = bcd2bin(m48t37_base->date);
+ hour = bcd2bin(m48t37_base->hour);
+ min = bcd2bin(m48t37_base->min);
+ sec = bcd2bin(m48t37_base->sec);
/* Start the update to the time again */
m48t37_base->control = 0x00;
@@ -113,22 +113,22 @@ int rtc_mips_set_time(unsigned long tim)
m48t37_base->control = 0x80;
/* year */
- m48t37_base->year = BIN2BCD(tm.tm_year % 100);
- m48t37_base->century = BIN2BCD(tm.tm_year / 100);
+ m48t37_base->year = bin2bcd(tm.tm_year % 100);
+ m48t37_base->century = bin2bcd(tm.tm_year / 100);
/* month */
- m48t37_base->month = BIN2BCD(tm.tm_mon);
+ m48t37_base->month = bin2bcd(tm.tm_mon);
/* day */
- m48t37_base->date = BIN2BCD(tm.tm_mday);
+ m48t37_base->date = bin2bcd(tm.tm_mday);
/* hour/min/sec */
- m48t37_base->hour = BIN2BCD(tm.tm_hour);
- m48t37_base->min = BIN2BCD(tm.tm_min);
- m48t37_base->sec = BIN2BCD(tm.tm_sec);
+ m48t37_base->hour = bin2bcd(tm.tm_hour);
+ m48t37_base->min = bin2bcd(tm.tm_min);
+ m48t37_base->sec = bin2bcd(tm.tm_sec);
/* day of week -- not really used, but let's keep it up-to-date */
- m48t37_base->day = BIN2BCD(tm.tm_wday + 1);
+ m48t37_base->day = bin2bcd(tm.tm_wday + 1);
/* disable writing */
m48t37_base->control = 0x00;
diff --git a/arch/mips/sibyte/swarm/rtc_m41t81.c b/arch/mips/sibyte/swarm/rtc_m41t81.c
index 26fbff4c15b..b732600b47f 100644
--- a/arch/mips/sibyte/swarm/rtc_m41t81.c
+++ b/arch/mips/sibyte/swarm/rtc_m41t81.c
@@ -156,32 +156,32 @@ int m41t81_set_time(unsigned long t)
*/
spin_lock_irqsave(&rtc_lock, flags);
- tm.tm_sec = BIN2BCD(tm.tm_sec);
+ tm.tm_sec = bin2bcd(tm.tm_sec);
m41t81_write(M41T81REG_SC, tm.tm_sec);
- tm.tm_min = BIN2BCD(tm.tm_min);
+ tm.tm_min = bin2bcd(tm.tm_min);
m41t81_write(M41T81REG_MN, tm.tm_min);
- tm.tm_hour = BIN2BCD(tm.tm_hour);
+ tm.tm_hour = bin2bcd(tm.tm_hour);
tm.tm_hour = (tm.tm_hour & 0x3f) | (m41t81_read(M41T81REG_HR) & 0xc0);
m41t81_write(M41T81REG_HR, tm.tm_hour);
/* tm_wday starts from 0 to 6 */
if (tm.tm_wday == 0) tm.tm_wday = 7;
- tm.tm_wday = BIN2BCD(tm.tm_wday);
+ tm.tm_wday = bin2bcd(tm.tm_wday);
m41t81_write(M41T81REG_DY, tm.tm_wday);
- tm.tm_mday = BIN2BCD(tm.tm_mday);
+ tm.tm_mday = bin2bcd(tm.tm_mday);
m41t81_write(M41T81REG_DT, tm.tm_mday);
/* tm_mon starts from 0, *ick* */
tm.tm_mon ++;
- tm.tm_mon = BIN2BCD(tm.tm_mon);
+ tm.tm_mon = bin2bcd(tm.tm_mon);
m41t81_write(M41T81REG_MO, tm.tm_mon);
/* we don't do century, everything is beyond 2000 */
tm.tm_year %= 100;
- tm.tm_year = BIN2BCD(tm.tm_year);
+ tm.tm_year = bin2bcd(tm.tm_year);
m41t81_write(M41T81REG_YR, tm.tm_year);
spin_unlock_irqrestore(&rtc_lock, flags);
@@ -209,12 +209,12 @@ unsigned long m41t81_get_time(void)
year = m41t81_read(M41T81REG_YR);
spin_unlock_irqrestore(&rtc_lock, flags);
- sec = BCD2BIN(sec);
- min = BCD2BIN(min);
- hour = BCD2BIN(hour);
- day = BCD2BIN(day);
- mon = BCD2BIN(mon);
- year = BCD2BIN(year);
+ sec = bcd2bin(sec);
+ min = bcd2bin(min);
+ hour = bcd2bin(hour);
+ day = bcd2bin(day);
+ mon = bcd2bin(mon);
+ year = bcd2bin(year);
year += 2000;
diff --git a/arch/mips/sibyte/swarm/rtc_xicor1241.c b/arch/mips/sibyte/swarm/rtc_xicor1241.c
index ff3e5dabb34..4438b2195c4 100644
--- a/arch/mips/sibyte/swarm/rtc_xicor1241.c
+++ b/arch/mips/sibyte/swarm/rtc_xicor1241.c
@@ -124,18 +124,18 @@ int xicor_set_time(unsigned long t)
xicor_write(X1241REG_SR, X1241REG_SR_WEL | X1241REG_SR_RWEL);
/* trivial ones */
- tm.tm_sec = BIN2BCD(tm.tm_sec);
+ tm.tm_sec = bin2bcd(tm.tm_sec);
xicor_write(X1241REG_SC, tm.tm_sec);
- tm.tm_min = BIN2BCD(tm.tm_min);
+ tm.tm_min = bin2bcd(tm.tm_min);
xicor_write(X1241REG_MN, tm.tm_min);
- tm.tm_mday = BIN2BCD(tm.tm_mday);
+ tm.tm_mday = bin2bcd(tm.tm_mday);
xicor_write(X1241REG_DT, tm.tm_mday);
/* tm_mon starts from 0, *ick* */
tm.tm_mon ++;
- tm.tm_mon = BIN2BCD(tm.tm_mon);
+ tm.tm_mon = bin2bcd(tm.tm_mon);
xicor_write(X1241REG_MO, tm.tm_mon);
/* year is split */
@@ -148,7 +148,7 @@ int xicor_set_time(unsigned long t)
tmp = xicor_read(X1241REG_HR);
if (tmp & X1241REG_HR_MIL) {
/* 24 hour format */
- tm.tm_hour = BIN2BCD(tm.tm_hour);
+ tm.tm_hour = bin2bcd(tm.tm_hour);
tmp = (tmp & ~0x3f) | (tm.tm_hour & 0x3f);
} else {
/* 12 hour format, with 0x2 for pm */
@@ -157,7 +157,7 @@ int xicor_set_time(unsigned long t)
tmp |= 0x20;
tm.tm_hour -= 12;
}
- tm.tm_hour = BIN2BCD(tm.tm_hour);
+ tm.tm_hour = bin2bcd(tm.tm_hour);
tmp |= tm.tm_hour;
}
xicor_write(X1241REG_HR, tmp);
@@ -191,13 +191,13 @@ unsigned long xicor_get_time(void)
y2k = xicor_read(X1241REG_Y2K);
spin_unlock_irqrestore(&rtc_lock, flags);
- sec = BCD2BIN(sec);
- min = BCD2BIN(min);
- hour = BCD2BIN(hour);
- day = BCD2BIN(day);
- mon = BCD2BIN(mon);
- year = BCD2BIN(year);
- y2k = BCD2BIN(y2k);
+ sec = bcd2bin(sec);
+ min = bcd2bin(min);
+ hour = bcd2bin(hour);
+ day = bcd2bin(day);
+ mon = bcd2bin(mon);
+ year = bcd2bin(year);
+ y2k = bcd2bin(y2k);
year += (y2k * 100);
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index dd557c9cf00..9a9f4335887 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -68,6 +68,8 @@ mainmenu "Matsushita MN10300/AM33 Kernel Configuration"
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "Matsushita MN10300 system setup"
diff --git a/arch/mn10300/kernel/rtc.c b/arch/mn10300/kernel/rtc.c
index 042f792d843..7978470b574 100644
--- a/arch/mn10300/kernel/rtc.c
+++ b/arch/mn10300/kernel/rtc.c
@@ -67,7 +67,7 @@ static int set_rtc_mmss(unsigned long nowtime)
cmos_minutes = CMOS_READ(RTC_MINUTES);
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- BCD_TO_BIN(cmos_minutes);
+ cmos_minutes = bcd2bin(cmos_minutes);
/*
* since we're only adjusting minutes and seconds,
@@ -84,8 +84,8 @@ static int set_rtc_mmss(unsigned long nowtime)
if (abs(real_minutes - cmos_minutes) < 30) {
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
+ real_seconds = bin2bcd(real_seconds);
+ real_minutes = bin2bcd(real_minutes);
}
CMOS_WRITE(real_seconds, RTC_SECONDS);
CMOS_WRITE(real_minutes, RTC_MINUTES);
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 8313fccced5..2bd1f6ef5db 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -90,6 +90,8 @@ config ARCH_MAY_HAVE_PC_FDC
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "Processor type and features"
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 380baa1780e..9391199d9e7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -230,6 +230,8 @@ config PPC_OF_PLATFORM_PCI
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
source "arch/powerpc/sysdev/Kconfig"
source "arch/powerpc/platforms/Kconfig"
diff --git a/arch/powerpc/include/asm/ps3av.h b/arch/powerpc/include/asm/ps3av.h
index fda98715cd3..5aa22cffdbd 100644
--- a/arch/powerpc/include/asm/ps3av.h
+++ b/arch/powerpc/include/asm/ps3av.h
@@ -678,6 +678,8 @@ struct ps3av_pkt_avb_param {
u8 buf[PS3AV_PKT_AVB_PARAM_MAX_BUF_SIZE];
};
+/* channel status */
+extern u8 ps3av_mode_cs_info[];
/** command status **/
#define PS3AV_STATUS_SUCCESS 0x0000 /* success */
@@ -735,6 +737,7 @@ extern int ps3av_get_mode(void);
extern int ps3av_video_mode2res(u32, u32 *, u32 *);
extern int ps3av_video_mute(int);
extern int ps3av_audio_mute(int);
+extern int ps3av_audio_mute_analog(int);
extern int ps3av_dev_open(void);
extern int ps3av_dev_close(void);
extern void ps3av_register_flip_ctl(void (*flip_ctl)(int on, void *data),
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index a323c9b32ee..97e05637972 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -27,6 +27,9 @@
#define DBG(fmt...)
#endif
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
void __init reserve_kdump_trampoline(void)
{
lmb_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -66,7 +69,11 @@ void __init setup_kdump_trampoline(void)
DBG(" <- setup_kdump_trampoline()\n");
}
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
static int __init parse_elfcorehdr(char *p)
{
if (p)
@@ -75,7 +82,6 @@ static int __init parse_elfcorehdr(char *p)
return 1;
}
__setup("elfcorehdr=", parse_elfcorehdr);
-#endif
static int __init parse_savemaxmem(char *p)
{
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 98d7bf99533..b9e1a1da6e5 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -134,23 +134,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
return __add_pages(zone, start_pfn, nr_pages);
}
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
- unsigned long start_pfn, end_pfn;
- int ret;
-
- start_pfn = start >> PAGE_SHIFT;
- end_pfn = start_pfn + (size >> PAGE_SHIFT);
- ret = offline_pages(start_pfn, end_pfn, 120 * HZ);
- if (ret)
- goto out;
- /* Arch-specific calls go here - next patch */
-out:
- return ret;
-}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
#endif /* CONFIG_MEMORY_HOTPLUG */
/*
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bc581d8a7cd..70b7645ce74 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -78,6 +78,8 @@ config S390
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "Base setup"
comment "Processor type and features"
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index ea40a9d690f..de3fad60c68 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -99,6 +99,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_31BIT 18 /* 32bit process */
#define TIF_MEMDIE 19
#define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */
+#define TIF_FREEZE 21 /* thread is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_USEDFPU (1<<TIF_USEDFPU)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_31BIT (1<<TIF_31BIT)
+#define _TIF_FREEZE (1<<TIF_FREEZE)
#endif /* __KERNEL__ */
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 1169130a97e..158b0d6d704 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -189,14 +189,3 @@ int arch_add_memory(int nid, u64 start, u64 size)
return rc;
}
#endif /* CONFIG_MEMORY_HOTPLUG */
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
- unsigned long start_pfn, end_pfn;
-
- start_pfn = PFN_DOWN(start);
- end_pfn = start_pfn + PFN_DOWN(size);
- return offline_pages(start_pfn, end_pfn, 120 * HZ);
-}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b4aa2a03e19..cb2c87df70c 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -121,6 +121,8 @@ config IO_TRAPPED
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "System type"
#
diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c
index 4a2ecbe27d8..95d21625556 100644
--- a/arch/sh/kernel/crash_dump.c
+++ b/arch/sh/kernel/crash_dump.c
@@ -10,6 +10,9 @@
#include <linux/io.h>
#include <asm/uaccess.h>
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
/**
* copy_oldmem_page - copy one page from "oldmem"
* @pfn: page frame number to be copied
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 97671dac12a..e594559c8db 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -37,6 +37,8 @@ config HZ
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "General machine setup"
config SMP
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 29899fd5b1b..80fe547c3f4 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -135,6 +135,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
#define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling
* TIF_NEED_RESCHED */
#define TIF_MEMDIE 10
+#define TIF_FREEZE 11 /* is freezing for suspend */
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -148,6 +149,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
#define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | \
_TIF_SIGPENDING | \
_TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE (1<<TIF_FREEZE)
#endif /* __KERNEL__ */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index c0a737d7292..639ac805448 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -237,6 +237,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
#define TIF_ABI_PENDING 12
#define TIF_MEMDIE 13
#define TIF_POLLING_NRFLAG 14
+#define TIF_FREEZE 15 /* is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -249,6 +250,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE (1<<TIF_FREEZE)
#define _TIF_USER_WORK_MASK ((0xff << TI_FLAG_WSAVED_SHIFT) | \
_TIF_DO_NOTIFY_RESUME_MASK | \
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 5446e2a499b..035b15af90d 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -96,6 +96,7 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
def_bool y
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
menu "Processor type and features"
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 6976812cfb1..393bccfe178 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -229,6 +229,8 @@ endmenu
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
source "drivers/block/Kconfig"
source "arch/um/Kconfig.char"
diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index fd0c25ad6af..129647375a6 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -179,7 +179,8 @@ static int copy_sc_from_user(struct pt_regs *regs,
if (have_fpx_regs) {
struct user_fxsr_struct fpx;
- err = copy_from_user(&fpx, &sc.fpstate->_fxsr_env[0],
+ err = copy_from_user(&fpx,
+ &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0],
sizeof(struct user_fxsr_struct));
if (err)
return 1;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bd3c2c53873..49349ba77d8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -193,6 +193,7 @@ config X86_TRAMPOLINE
config KTIME_SCALAR
def_bool X86_32
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
menu "Processor type and features"
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 72d0c56c1b4..f7cdb3b457a 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -13,6 +13,9 @@
static void *kdump_buf_page;
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
/**
* copy_oldmem_page - copy one page from "oldmem"
* @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index e90a60ef10c..045b36cada6 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -10,6 +10,9 @@
#include <linux/uaccess.h>
#include <linux/io.h>
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
/**
* copy_oldmem_page - copy one page from "oldmem"
* @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 0a23b5795b2..dd6f2b71561 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -52,7 +52,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
cmos_minutes = CMOS_READ(RTC_MINUTES);
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- BCD_TO_BIN(cmos_minutes);
+ cmos_minutes = bcd2bin(cmos_minutes);
/*
* since we're only adjusting minutes and seconds,
@@ -69,8 +69,8 @@ int mach_set_rtc_mmss(unsigned long nowtime)
if (abs(real_minutes - cmos_minutes) < 30) {
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
+ real_seconds = bin2bcd(real_seconds);
+ real_minutes = bin2bcd(real_minutes);
}
CMOS_WRITE(real_seconds,RTC_SECONDS);
CMOS_WRITE(real_minutes,RTC_MINUTES);
@@ -124,16 +124,16 @@ unsigned long mach_get_cmos_time(void)
WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
+ sec = bcd2bin(sec);
+ min = bcd2bin(min);
+ hour = bcd2bin(hour);
+ day = bcd2bin(day);
+ mon = bcd2bin(mon);
+ year = bcd2bin(year);
}
if (century) {
- BCD_TO_BIN(century);
+ century = bcd2bin(century);
year += century * 100;
printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
} else
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2255782e8d4..b2c97874ec0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -561,7 +561,13 @@ static void __init reserve_standard_io_resources(void)
}
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+
+#ifdef CONFIG_CRASH_DUMP
/* elfcorehdr= specifies the location of elf core header
* stored by the crashed kernel. This option will be passed
* by kexec loader to the capture kernel.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a9ec89c3fbc..407d8784f66 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -792,6 +792,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
/* Must avoid aliasing mappings in the highmem code */
kmap_flush_unused();
+ vm_unmap_aliases();
+
cpa.vaddr = addr;
cpa.numpages = numpages;
cpa.mask_set = mask_set;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0013a729b41..b61534c7a4c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -871,6 +871,7 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
/* make sure there are no stray mappings of
this page */
kmap_flush_unused();
+ vm_unmap_aliases();
}
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ae173f6edd8..d4d52f5a1cf 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -846,6 +846,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
/* re-enable interrupts for kmap_flush_unused */
xen_mc_issue(0);
kmap_flush_unused();
+ vm_unmap_aliases();
xen_mc_batch();
}
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 02e417d3d8e..a213260b51e 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -55,6 +55,7 @@ config HZ
default 100
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
menu "Processor type and features"
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index b1c723f9f58..70f7f60929c 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -431,7 +431,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
}
static struct device_attribute alarm_attr = {
- .attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+ .attr = {.name = "alarm", .mode = 0644},
.show = acpi_battery_alarm_show,
.store = acpi_battery_alarm_store,
};
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 10a36512647..7b011e7e29f 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -463,7 +463,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
}
static struct device_attribute alarm_attr = {
- .attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+ .attr = {.name = "alarm", .mode = 0644},
.show = acpi_battery_alarm_show,
.store = acpi_battery_alarm_store,
};
diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c
index bf5b04de02d..631ee2ee2ca 100644
--- a/drivers/acpi/sleep/proc.c
+++ b/drivers/acpi/sleep/proc.c
@@ -120,13 +120,13 @@ static int acpi_system_alarm_seq_show(struct seq_file *seq, void *offset)
spin_unlock_irqrestore(&rtc_lock, flags);
if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hr);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mo);
- BCD_TO_BIN(yr);
- BCD_TO_BIN(cent);
+ sec = bcd2bin(sec);
+ min = bcd2bin(min);
+ hr = bcd2bin(hr);
+ day = bcd2bin(day);
+ mo = bcd2bin(mo);
+ yr = bcd2bin(yr);
+ cent = bcd2bin(cent);
}
/* we're trusting the FADT (see above) */
@@ -204,7 +204,7 @@ static u32 cmos_bcd_read(int offset, int rtc_control)
{
u32 val = CMOS_READ(offset);
if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- BCD_TO_BIN(val);
+ val = bcd2bin(val);
return val;
}
@@ -212,7 +212,7 @@ static u32 cmos_bcd_read(int offset, int rtc_control)
static void cmos_bcd_write(u32 val, int offset, int rtc_control)
{
if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- BIN_TO_BCD(val);
+ val = bin2bcd(val);
CMOS_WRITE(val, offset);
}
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index 91dec448b3e..24e80fd927e 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -115,7 +115,6 @@ static void acpi_table_attr_init(struct acpi_table_attr *table_attr,
table_attr->attr.read = acpi_table_show;
table_attr->attr.attr.name = table_attr->name;
table_attr->attr.attr.mode = 0444;
- table_attr->attr.attr.owner = THIS_MODULE;
return;
}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index af0d175c025..5260e9e0df4 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -21,6 +21,8 @@
#include <linux/memory_hotplug.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/stat.h>
+
#include <asm/atomic.h>
#include <asm/uaccess.h>
@@ -325,7 +327,7 @@ memory_probe_store(struct class *class, const char *buf, size_t count)
return count;
}
-static CLASS_ATTR(probe, 0700, NULL, memory_probe_store);
+static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
static int memory_probe_init(void)
{
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 5116b78c632..f5207090885 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -13,6 +13,7 @@
#include <linux/nodemask.h>
#include <linux/cpu.h>
#include <linux/device.h>
+#include <linux/swap.h>
static struct sysdev_class node_class = {
.name = "node",
@@ -61,34 +62,52 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
si_meminfo_node(&i, nid);
n = sprintf(buf, "\n"
- "Node %d MemTotal: %8lu kB\n"
- "Node %d MemFree: %8lu kB\n"
- "Node %d MemUsed: %8lu kB\n"
- "Node %d Active: %8lu kB\n"
- "Node %d Inactive: %8lu kB\n"
+ "Node %d MemTotal: %8lu kB\n"
+ "Node %d MemFree: %8lu kB\n"
+ "Node %d MemUsed: %8lu kB\n"
+ "Node %d Active: %8lu kB\n"
+ "Node %d Inactive: %8lu kB\n"
+ "Node %d Active(anon): %8lu kB\n"
+ "Node %d Inactive(anon): %8lu kB\n"
+ "Node %d Active(file): %8lu kB\n"
+ "Node %d Inactive(file): %8lu kB\n"
+#ifdef CONFIG_UNEVICTABLE_LRU
+ "Node %d Unevictable: %8lu kB\n"
+ "Node %d Mlocked: %8lu kB\n"
+#endif
#ifdef CONFIG_HIGHMEM
- "Node %d HighTotal: %8lu kB\n"
- "Node %d HighFree: %8lu kB\n"
- "Node %d LowTotal: %8lu kB\n"
- "Node %d LowFree: %8lu kB\n"
+ "Node %d HighTotal: %8lu kB\n"
+ "Node %d HighFree: %8lu kB\n"
+ "Node %d LowTotal: %8lu kB\n"
+ "Node %d LowFree: %8lu kB\n"
#endif
- "Node %d Dirty: %8lu kB\n"
- "Node %d Writeback: %8lu kB\n"
- "Node %d FilePages: %8lu kB\n"
- "Node %d Mapped: %8lu kB\n"
- "Node %d AnonPages: %8lu kB\n"
- "Node %d PageTables: %8lu kB\n"
- "Node %d NFS_Unstable: %8lu kB\n"
- "Node %d Bounce: %8lu kB\n"
- "Node %d WritebackTmp: %8lu kB\n"
- "Node %d Slab: %8lu kB\n"
- "Node %d SReclaimable: %8lu kB\n"
- "Node %d SUnreclaim: %8lu kB\n",
+ "Node %d Dirty: %8lu kB\n"
+ "Node %d Writeback: %8lu kB\n"
+ "Node %d FilePages: %8lu kB\n"
+ "Node %d Mapped: %8lu kB\n"
+ "Node %d AnonPages: %8lu kB\n"
+ "Node %d PageTables: %8lu kB\n"
+ "Node %d NFS_Unstable: %8lu kB\n"
+ "Node %d Bounce: %8lu kB\n"
+ "Node %d WritebackTmp: %8lu kB\n"
+ "Node %d Slab: %8lu kB\n"
+ "Node %d SReclaimable: %8lu kB\n"
+ "Node %d SUnreclaim: %8lu kB\n",
nid, K(i.totalram),
nid, K(i.freeram),
nid, K(i.totalram - i.freeram),
- nid, K(node_page_state(nid, NR_ACTIVE)),
- nid, K(node_page_state(nid, NR_INACTIVE)),
+ nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
+ node_page_state(nid, NR_ACTIVE_FILE)),
+ nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
+ node_page_state(nid, NR_INACTIVE_FILE)),
+ nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
+ nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
+ nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
+ nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
+#ifdef CONFIG_UNEVICTABLE_LRU
+ nid, K(node_page_state(nid, NR_UNEVICTABLE)),
+ nid, K(node_page_state(nid, NR_MLOCK)),
+#endif
#ifdef CONFIG_HIGHMEM
nid, K(i.totalhigh),
nid, K(i.freehigh),
@@ -173,6 +192,8 @@ int register_node(struct node *node, int num, struct node *parent)
sysdev_create_file(&node->sysdev, &attr_meminfo);
sysdev_create_file(&node->sysdev, &attr_numastat);
sysdev_create_file(&node->sysdev, &attr_distance);
+
+ scan_unevictable_register_node(node);
}
return error;
}
@@ -192,6 +213,8 @@ void unregister_node(struct node *node)
sysdev_remove_file(&node->sysdev, &attr_numastat);
sysdev_remove_file(&node->sysdev, &attr_distance);
+ scan_unevictable_unregister_node(node);
+
sysdev_unregister(&node->sysdev);
}
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index b82654e883a..d876ad86123 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -90,7 +90,7 @@ static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
static struct device_attribute dev_attr_firmware_version = {
- .attr = { .name = "firmware-version", .mode = S_IRUGO, .owner = THIS_MODULE },
+ .attr = { .name = "firmware-version", .mode = S_IRUGO },
.show = aoedisk_show_fwver,
};
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7b3351260d5..9034ca585af 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -391,7 +391,7 @@ static ssize_t pid_show(struct device *dev,
}
static struct device_attribute pid_attr = {
- .attr = { .name = "pid", .mode = S_IRUGO, .owner = THIS_MODULE },
+ .attr = { .name = "pid", .mode = S_IRUGO},
.show = pid_show,
};
diff --git a/drivers/char/ds1286.c b/drivers/char/ds1286.c
index 5329d482b58..0a826d7be10 100644
--- a/drivers/char/ds1286.c
+++ b/drivers/char/ds1286.c
@@ -210,8 +210,8 @@ static int ds1286_ioctl(struct inode *inode, struct file *file,
if (sec != 0)
return -EINVAL;
- min = BIN2BCD(min);
- min = BIN2BCD(hrs);
+ min = bin2bcd(min);
+ min = bin2bcd(hrs);
spin_lock(&ds1286_lock);
rtc_write(hrs, RTC_HOURS_ALARM);
@@ -353,7 +353,7 @@ static int ds1286_proc_output(char *buf)
ds1286_get_time(&tm);
hundredth = rtc_read(RTC_HUNDREDTH_SECOND);
- BCD_TO_BIN(hundredth);
+ hundredth = bcd2bin(hundredth);
p += sprintf(p,
"rtc_time\t: %02d:%02d:%02d.%02d\n"
@@ -477,12 +477,12 @@ static void ds1286_get_time(struct rtc_time *rtc_tm)
rtc_write(save_control, RTC_CMD);
spin_unlock_irqrestore(&ds1286_lock, flags);
- BCD_TO_BIN(rtc_tm->tm_sec);
- BCD_TO_BIN(rtc_tm->tm_min);
- BCD_TO_BIN(rtc_tm->tm_hour);
- BCD_TO_BIN(rtc_tm->tm_mday);
- BCD_TO_BIN(rtc_tm->tm_mon);
- BCD_TO_BIN(rtc_tm->tm_year);
+ rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+ rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+ rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+ rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+ rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+ rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
/*
* Account for differences between how the RTC uses the values
@@ -531,12 +531,12 @@ static int ds1286_set_time(struct rtc_time *rtc_tm)
if (yrs >= 100)
yrs -= 100;
- BIN_TO_BCD(sec);
- BIN_TO_BCD(min);
- BIN_TO_BCD(hrs);
- BIN_TO_BCD(day);
- BIN_TO_BCD(mon);
- BIN_TO_BCD(yrs);
+ sec = bin2bcd(sec);
+ min = bin2bcd(min);
+ hrs = bin2bcd(hrs);
+ day = bin2bcd(day);
+ mon = bin2bcd(mon);
+ yrs = bin2bcd(yrs);
spin_lock_irqsave(&ds1286_lock, flags);
save_control = rtc_read(RTC_CMD);
@@ -572,8 +572,8 @@ static void ds1286_get_alm_time(struct rtc_time *alm_tm)
cmd = rtc_read(RTC_CMD);
spin_unlock_irqrestore(&ds1286_lock, flags);
- BCD_TO_BIN(alm_tm->tm_min);
- BCD_TO_BIN(alm_tm->tm_hour);
+ alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
+ alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
alm_tm->tm_sec = 0;
}
diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c
index c5e67a62395..170693c93c7 100644
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -131,12 +131,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
local_irq_restore(flags);
- BCD_TO_BIN(rtc_tm->tm_sec);
- BCD_TO_BIN(rtc_tm->tm_min);
- BCD_TO_BIN(rtc_tm->tm_hour);
- BCD_TO_BIN(rtc_tm->tm_mday);
- BCD_TO_BIN(rtc_tm->tm_mon);
- BCD_TO_BIN(rtc_tm->tm_year);
+ rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+ rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+ rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+ rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+ rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+ rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
/*
* Account for differences between how the RTC uses the values
@@ -211,12 +211,12 @@ static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
else
yrs -= 1900; /* RTC (70, 71, ... 99) */
- BIN_TO_BCD(sec);
- BIN_TO_BCD(min);
- BIN_TO_BCD(hrs);
- BIN_TO_BCD(day);
- BIN_TO_BCD(mon);
- BIN_TO_BCD(yrs);
+ sec = bin2bcd(sec);
+ min = bin2bcd(min);
+ hrs = bin2bcd(hrs);
+ day = bin2bcd(day);
+ mon = bin2bcd(mon);
+ yrs = bin2bcd(yrs);
lock_kernel();
local_irq_save(flags);
diff --git a/drivers/char/ip27-rtc.c b/drivers/char/ip27-rtc.c
index ec9d0443d92..2abd881b4cb 100644
--- a/drivers/char/ip27-rtc.c
+++ b/drivers/char/ip27-rtc.c
@@ -130,12 +130,12 @@ static long rtc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (yrs >= 100)
yrs -= 100;
- sec = BIN2BCD(sec);
- min = BIN2BCD(min);
- hrs = BIN2BCD(hrs);
- day = BIN2BCD(day);
- mon = BIN2BCD(mon);
- yrs = BIN2BCD(yrs);
+ sec = bin2bcd(sec);
+ min = bin2bcd(min);
+ hrs = bin2bcd(hrs);
+ day = bin2bcd(day);
+ mon = bin2bcd(mon);
+ yrs = bin2bcd(yrs);
spin_lock_irq(&rtc_lock);
rtc->control |= M48T35_RTC_SET;
@@ -311,12 +311,12 @@ static void get_rtc_time(struct rtc_time *rtc_tm)
rtc->control &= ~M48T35_RTC_READ;
spin_unlock_irq(&rtc_lock);
- rtc_tm->tm_sec = BCD2BIN(rtc_tm->tm_sec);
- rtc_tm->tm_min = BCD2BIN(rtc_tm->tm_min);
- rtc_tm->tm_hour = BCD2BIN(rtc_tm->tm_hour);
- rtc_tm->tm_mday = BCD2BIN(rtc_tm->tm_mday);
- rtc_tm->tm_mon = BCD2BIN(rtc_tm->tm_mon);
- rtc_tm->tm_year = BCD2BIN(rtc_tm->tm_year);
+ rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+ rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+ rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+ rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+ rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+ rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
/*
* Account for differences between how the RTC uses the values
diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c
index b930de50407..3f7da8cf3a8 100644
--- a/drivers/char/pc8736x_gpio.c
+++ b/drivers/char/pc8736x_gpio.c
@@ -41,7 +41,8 @@ static u8 pc8736x_gpio_shadow[4];
#define SIO_BASE2 0x4E /* alt command-reg to check */
#define SIO_SID 0x20 /* SuperI/O ID Register */
-#define SIO_SID_VALUE 0xe9 /* Expected value in SuperI/O ID Register */
+#define SIO_SID_PC87365 0xe5 /* Expected value in ID Register for PC87365 */
+#define SIO_SID_PC87366 0xe9 /* Expected value in ID Register for PC87366 */
#define SIO_CF1 0x21 /* chip config, bit0 is chip enable */
@@ -91,13 +92,17 @@ static inline int superio_inb(int addr)
static int pc8736x_superio_present(void)
{
+ int id;
+
/* try the 2 possible values, read a hardware reg to verify */
superio_cmd = SIO_BASE1;
- if (superio_inb(SIO_SID) == SIO_SID_VALUE)
+ id = superio_inb(SIO_SID);
+ if (id == SIO_SID_PC87365 || id == SIO_SID_PC87366)
return superio_cmd;
superio_cmd = SIO_BASE2;
- if (superio_inb(SIO_SID) == SIO_SID_VALUE)
+ id = superio_inb(SIO_SID);
+ if (id == SIO_SID_PC87365 || id == SIO_SID_PC87366)
return superio_cmd;
return 0;
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 17683de9571..32dc89720d5 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -518,17 +518,17 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) ||
RTC_ALWAYS_BCD) {
if (sec < 60)
- BIN_TO_BCD(sec);
+ sec = bin2bcd(sec);
else
sec = 0xff;
if (min < 60)
- BIN_TO_BCD(min);
+ min = bin2bcd(min);
else
min = 0xff;
if (hrs < 24)
- BIN_TO_BCD(hrs);
+ hrs = bin2bcd(hrs);
else
hrs = 0xff;
}
@@ -614,12 +614,12 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
|| RTC_ALWAYS_BCD) {
- BIN_TO_BCD(sec);
- BIN_TO_BCD(min);
- BIN_TO_BCD(hrs);
- BIN_TO_BCD(day);
- BIN_TO_BCD(mon);
- BIN_TO_BCD(yrs);
+ sec = bin2bcd(sec);
+ min = bin2bcd(min);
+ hrs = bin2bcd(hrs);
+ day = bin2bcd(day);
+ mon = bin2bcd(mon);
+ yrs = bin2bcd(yrs);
}
save_control = CMOS_READ(RTC_CONTROL);
@@ -1099,7 +1099,7 @@ no_irq:
spin_unlock_irq(&rtc_lock);
if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- BCD_TO_BIN(year); /* This should never happen... */
+ year = bcd2bin(year); /* This should never happen... */
if (year < 20) {
epoch = 2000;
@@ -1352,13 +1352,13 @@ static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
spin_unlock_irqrestore(&rtc_lock, flags);
if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BCD_TO_BIN(rtc_tm->tm_sec);
- BCD_TO_BIN(rtc_tm->tm_min);
- BCD_TO_BIN(rtc_tm->tm_hour);
- BCD_TO_BIN(rtc_tm->tm_mday);
- BCD_TO_BIN(rtc_tm->tm_mon);
- BCD_TO_BIN(rtc_tm->tm_year);
- BCD_TO_BIN(rtc_tm->tm_wday);
+ rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+ rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+ rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+ rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+ rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+ rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
+ rtc_tm->tm_wday = bcd2bin(rtc_tm->tm_wday);
}
#ifdef CONFIG_MACH_DECSTATION
@@ -1392,9 +1392,9 @@ static void get_rtc_alm_time(struct rtc_time *alm_tm)
spin_unlock_irq(&rtc_lock);
if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BCD_TO_BIN(alm_tm->tm_sec);
- BCD_TO_BIN(alm_tm->tm_min);
- BCD_TO_BIN(alm_tm->tm_hour);
+ alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
+ alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
+ alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
}
}
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 5b8d7a1aa3e..ba4e86281fb 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -2504,7 +2504,7 @@ static void __devexit sx_remove_card(struct sx_board *board,
del_timer(&board->timer);
if (pdev) {
#ifdef CONFIG_PCI
- pci_iounmap(pdev, board->base2);
+ iounmap(board->base2);
pci_release_region(pdev, IS_CF_BOARD(board) ? 3 : 2);
#endif
} else {
@@ -2677,7 +2677,7 @@ static int __devinit sx_pci_probe(struct pci_dev *pdev,
}
board->hw_base = pci_resource_start(pdev, reg);
board->base2 =
- board->base = pci_iomap(pdev, reg, WINDOW_LEN(board));
+ board->base = ioremap_nocache(board->hw_base, WINDOW_LEN(board));
if (!board->base) {
dev_err(&pdev->dev, "ioremap failed\n");
goto err_reg;
@@ -2703,7 +2703,7 @@ static int __devinit sx_pci_probe(struct pci_dev *pdev,
return 0;
err_unmap:
- pci_iounmap(pdev, board->base2);
+ iounmap(board->base2);
err_reg:
pci_release_region(pdev, reg);
err_flag:
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index dce4cc0e695..d0c0d64ed36 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
static struct sysrq_key_op sysrq_show_timers_op = {
.handler = sysrq_handle_show_timers,
.help_msg = "show-all-timers(Q)",
- .action_msg = "Show Pending Timers",
+ .action_msg = "Show pending hrtimers (no others)",
};
static void sysrq_handle_mountro(int key, struct tty_struct *tty)
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index e70d13defde..9c47dc48c9f 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -1157,7 +1157,7 @@ EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
* Once all references to platform device are down to 0,
* release all allocated structures.
*/
-static void tpm_dev_release(struct device *dev)
+void tpm_dev_release(struct device *dev)
{
struct tpm_chip *chip = dev_get_drvdata(dev);
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 0e024fe2d8c..887072f5dc8 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -142,7 +142,7 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
csrow->nr_pages = (r.end - r.start + 1) >> PAGE_SHIFT;
csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
csrow->mtype = MEM_XDR;
- csrow->edac_mode = EDAC_FLAG_EC | EDAC_FLAG_SECDED;
+ csrow->edac_mode = EDAC_SECDED;
dev_dbg(mci->dev,
"Initialized on node %d, chanmask=0x%x,"
" first_page=0x%lx, nr_pages=0x%x\n",
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index deb154aa47c..4353414a0b7 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -732,7 +732,6 @@ static int __init ibft_create_attribute(struct ibft_kobject *kobj_data,
attr->attr.name = name;
attr->attr.mode = S_IRUSR;
- attr->attr.owner = THIS_MODULE;
attr->hdr = hdr;
attr->show = show;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9112830107a..22edc4273ef 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -248,7 +248,7 @@ static ssize_t gpio_value_show(struct device *dev,
if (!test_bit(FLAG_EXPORT, &desc->flags))
status = -EIO;
else
- status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
+ status = sprintf(buf, "%d\n", !!gpio_get_value_cansleep(gpio));
mutex_unlock(&sysfs_lock);
return status;
@@ -1105,7 +1105,7 @@ int gpio_get_value_cansleep(unsigned gpio)
might_sleep_if(extra_checks);
chip = gpio_to_chip(gpio);
- return chip->get(chip, gpio - chip->base);
+ return chip->get ? chip->get(chip, gpio - chip->base) : 0;
}
EXPORT_SYMBOL_GPL(gpio_get_value_cansleep);
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index b06b8e090a2..bc011da79e1 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -49,6 +49,9 @@
#define APPLESMC_MAX_DATA_LENGTH 32
+#define APPLESMC_MIN_WAIT 0x0040
+#define APPLESMC_MAX_WAIT 0x8000
+
#define APPLESMC_STATUS_MASK 0x0f
#define APPLESMC_READ_CMD 0x10
#define APPLESMC_WRITE_CMD 0x11
@@ -57,8 +60,8 @@
#define KEY_COUNT_KEY "#KEY" /* r-o ui32 */
-#define LIGHT_SENSOR_LEFT_KEY "ALV0" /* r-o {alv (6 bytes) */
-#define LIGHT_SENSOR_RIGHT_KEY "ALV1" /* r-o {alv (6 bytes) */
+#define LIGHT_SENSOR_LEFT_KEY "ALV0" /* r-o {alv (6-10 bytes) */
+#define LIGHT_SENSOR_RIGHT_KEY "ALV1" /* r-o {alv (6-10 bytes) */
#define BACKLIGHT_KEY "LKSB" /* w-o {lkb (2 bytes) */
#define CLAMSHELL_KEY "MSLD" /* r-o ui8 (unused) */
@@ -104,6 +107,15 @@ static const char* temperature_sensors_sets[][36] = {
/* Set 6: Macbook3 set */
{ "TB0T", "TC0D", "TC0P", "TM0P", "TN0P", "TTF0", "TW0P", "Th0H",
"Th0S", "Th1H", NULL },
+/* Set 7: Macbook Air */
+ { "TB0T", "TB1S", "TB1T", "TB2S", "TB2T", "TC0D", "TC0P", "TCFP",
+ "TTF0", "TW0P", "Th0H", "Tp0P", "TpFP", "Ts0P", "Ts0S", NULL },
+/* Set 8: Macbook Pro 4,1 (Penryn) */
+ { "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P", "Th0H",
+ "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
+/* Set 9: Macbook Pro 3,1 (Santa Rosa) */
+ { "TALP", "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P",
+ "Th0H", "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
};
/* List of keys used to read/write fan speeds */
@@ -163,25 +175,25 @@ static unsigned int key_at_index;
static struct workqueue_struct *applesmc_led_wq;
/*
- * __wait_status - Wait up to 2ms for the status port to get a certain value
+ * __wait_status - Wait up to 32ms for the status port to get a certain value
* (masked with 0x0f), returning zero if the value is obtained. Callers must
* hold applesmc_lock.
*/
static int __wait_status(u8 val)
{
- unsigned int i;
+ int us;
val = val & APPLESMC_STATUS_MASK;
- for (i = 0; i < 200; i++) {
+ for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
+ udelay(us);
if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == val) {
if (debug)
printk(KERN_DEBUG
- "Waited %d us for status %x\n",
- i*10, val);
+ "Waited %d us for status %x\n",
+ 2 * us - APPLESMC_MIN_WAIT, val);
return 0;
}
- udelay(10);
}
printk(KERN_WARNING "applesmc: wait status failed: %x != %x\n",
@@ -191,6 +203,25 @@ static int __wait_status(u8 val)
}
/*
+ * special treatment of command port - on newer macbooks, it seems necessary
+ * to resend the command byte before polling the status again. Callers must
+ * hold applesmc_lock.
+ */
+static int send_command(u8 cmd)
+{
+ int us;
+ for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
+ outb(cmd, APPLESMC_CMD_PORT);
+ udelay(us);
+ if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == 0x0c)
+ return 0;
+ }
+ printk(KERN_WARNING "applesmc: command failed: %x -> %x\n",
+ cmd, inb(APPLESMC_CMD_PORT));
+ return -EIO;
+}
+
+/*
* applesmc_read_key - reads len bytes from a given key, and put them in buffer.
* Returns zero on success or a negative error on failure. Callers must
* hold applesmc_lock.
@@ -205,8 +236,7 @@ static int applesmc_read_key(const char* key, u8* buffer, u8 len)
return -EINVAL;
}
- outb(APPLESMC_READ_CMD, APPLESMC_CMD_PORT);
- if (__wait_status(0x0c))
+ if (send_command(APPLESMC_READ_CMD))
return -EIO;
for (i = 0; i < 4; i++) {
@@ -249,8 +279,7 @@ static int applesmc_write_key(const char* key, u8* buffer, u8 len)
return -EINVAL;
}
- outb(APPLESMC_WRITE_CMD, APPLESMC_CMD_PORT);
- if (__wait_status(0x0c))
+ if (send_command(APPLESMC_WRITE_CMD))
return -EIO;
for (i = 0; i < 4; i++) {
@@ -284,8 +313,7 @@ static int applesmc_get_key_at_index(int index, char* key)
readkey[2] = index >> 8;
readkey[3] = index;
- outb(APPLESMC_GET_KEY_BY_INDEX_CMD, APPLESMC_CMD_PORT);
- if (__wait_status(0x0c))
+ if (send_command(APPLESMC_GET_KEY_BY_INDEX_CMD))
return -EIO;
for (i = 0; i < 4; i++) {
@@ -315,8 +343,7 @@ static int applesmc_get_key_type(char* key, char* type)
{
int i;
- outb(APPLESMC_GET_KEY_TYPE_CMD, APPLESMC_CMD_PORT);
- if (__wait_status(0x0c))
+ if (send_command(APPLESMC_GET_KEY_TYPE_CMD))
return -EIO;
for (i = 0; i < 4; i++) {
@@ -325,7 +352,7 @@ static int applesmc_get_key_type(char* key, char* type)
return -EIO;
}
- outb(5, APPLESMC_DATA_PORT);
+ outb(6, APPLESMC_DATA_PORT);
for (i = 0; i < 6; i++) {
if (__wait_status(0x05))
@@ -527,17 +554,27 @@ out:
static ssize_t applesmc_light_show(struct device *dev,
struct device_attribute *attr, char *sysfsbuf)
{
+ static int data_length;
int ret;
u8 left = 0, right = 0;
- u8 buffer[6];
+ u8 buffer[10], query[6];
mutex_lock(&applesmc_lock);
- ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, 6);
+ if (!data_length) {
+ ret = applesmc_get_key_type(LIGHT_SENSOR_LEFT_KEY, query);
+ if (ret)
+ goto out;
+ data_length = clamp_val(query[0], 0, 10);
+ printk(KERN_INFO "applesmc: light sensor data length set to "
+ "%d\n", data_length);
+ }
+
+ ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
left = buffer[2];
if (ret)
goto out;
- ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, 6);
+ ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
right = buffer[2];
out:
@@ -1233,39 +1270,57 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = {
{ .accelerometer = 0, .light = 0, .temperature_set = 5 },
/* MacBook3: accelerometer and temperature set 6 */
{ .accelerometer = 1, .light = 0, .temperature_set = 6 },
+/* MacBook Air: accelerometer, backlight and temperature set 7 */
+ { .accelerometer = 1, .light = 1, .temperature_set = 7 },
+/* MacBook Pro 4: accelerometer, backlight and temperature set 8 */
+ { .accelerometer = 1, .light = 1, .temperature_set = 8 },
+/* MacBook Pro 3: accelerometer, backlight and temperature set 9 */
+ { .accelerometer = 1, .light = 1, .temperature_set = 9 },
};
/* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1".
* So we need to put "Apple MacBook Pro" before "Apple MacBook". */
static __initdata struct dmi_system_id applesmc_whitelist[] = {
+ { applesmc_dmi_match, "Apple MacBook Air", {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") },
+ &applesmc_dmi_data[7]},
+ { applesmc_dmi_match, "Apple MacBook Pro 4", {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro4") },
+ &applesmc_dmi_data[8]},
+ { applesmc_dmi_match, "Apple MacBook Pro 3", {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro3") },
+ &applesmc_dmi_data[9]},
{ applesmc_dmi_match, "Apple MacBook Pro", {
DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro") },
- (void*)&applesmc_dmi_data[0]},
+ &applesmc_dmi_data[0]},
{ applesmc_dmi_match, "Apple MacBook (v2)", {
DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
DMI_MATCH(DMI_PRODUCT_NAME,"MacBook2") },
- (void*)&applesmc_dmi_data[1]},
+ &applesmc_dmi_data[1]},
{ applesmc_dmi_match, "Apple MacBook (v3)", {
DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
DMI_MATCH(DMI_PRODUCT_NAME,"MacBook3") },
- (void*)&applesmc_dmi_data[6]},
+ &applesmc_dmi_data[6]},
{ applesmc_dmi_match, "Apple MacBook", {
DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
DMI_MATCH(DMI_PRODUCT_NAME,"MacBook") },
- (void*)&applesmc_dmi_data[2]},
+ &applesmc_dmi_data[2]},
{ applesmc_dmi_match, "Apple Macmini", {
DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
DMI_MATCH(DMI_PRODUCT_NAME,"Macmini") },
- (void*)&applesmc_dmi_data[3]},
+ &applesmc_dmi_data[3]},
{ applesmc_dmi_match, "Apple MacPro2", {
DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
DMI_MATCH(DMI_PRODUCT_NAME,"MacPro2") },
- (void*)&applesmc_dmi_data[4]},
+ &applesmc_dmi_data[4]},
{ applesmc_dmi_match, "Apple iMac", {
DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
DMI_MATCH(DMI_PRODUCT_NAME,"iMac") },
- (void*)&applesmc_dmi_data[5]},
+ &applesmc_dmi_data[5]},
{ .ident = NULL }
};
diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 9b462bb13fa..5fbfa34c110 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -75,7 +75,8 @@ MODULE_PARM_DESC(force_id, "Override the detected device ID");
#define FSCM 0x09 /* Logical device: fans */
#define VLM 0x0d /* Logical device: voltages */
#define TMS 0x0e /* Logical device: temperatures */
-static const u8 logdev[3] = { FSCM, VLM, TMS };
+#define LDNI_MAX 3
+static const u8 logdev[LDNI_MAX] = { FSCM, VLM, TMS };
#define LD_FAN 0
#define LD_IN 1
@@ -489,11 +490,66 @@ static struct sensor_device_attribute in_max[] = {
SENSOR_ATTR(in10_max, S_IWUSR | S_IRUGO, show_in_max, set_in_max, 10),
};
+/* (temp & vin) channel status register alarm bits (pdf sec.11.5.12) */
+#define CHAN_ALM_MIN 0x02 /* min limit crossed */
+#define CHAN_ALM_MAX 0x04 /* max limit exceeded */
+#define TEMP_ALM_CRIT 0x08 /* temp crit exceeded (temp only) */
+
+/* show_in_min/max_alarm() reads data from the per-channel status
+ register (sec 11.5.12), not the vin event status registers (sec
+ 11.5.2) that (legacy) show_in_alarm() resds (via data->in_alarms) */
+
+static ssize_t show_in_min_alarm(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct pc87360_data *data = pc87360_update_device(dev);
+ unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+ return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_in_max_alarm(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct pc87360_data *data = pc87360_update_device(dev);
+ unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+ return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MAX));
+}
+
+static struct sensor_device_attribute in_min_alarm[] = {
+ SENSOR_ATTR(in0_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 0),
+ SENSOR_ATTR(in1_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 1),
+ SENSOR_ATTR(in2_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 2),
+ SENSOR_ATTR(in3_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 3),
+ SENSOR_ATTR(in4_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 4),
+ SENSOR_ATTR(in5_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 5),
+ SENSOR_ATTR(in6_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 6),
+ SENSOR_ATTR(in7_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 7),
+ SENSOR_ATTR(in8_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 8),
+ SENSOR_ATTR(in9_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 9),
+ SENSOR_ATTR(in10_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 10),
+};
+static struct sensor_device_attribute in_max_alarm[] = {
+ SENSOR_ATTR(in0_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 0),
+ SENSOR_ATTR(in1_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 1),
+ SENSOR_ATTR(in2_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 2),
+ SENSOR_ATTR(in3_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 3),
+ SENSOR_ATTR(in4_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 4),
+ SENSOR_ATTR(in5_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 5),
+ SENSOR_ATTR(in6_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 6),
+ SENSOR_ATTR(in7_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 7),
+ SENSOR_ATTR(in8_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 8),
+ SENSOR_ATTR(in9_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 9),
+ SENSOR_ATTR(in10_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 10),
+};
+
#define VIN_UNIT_ATTRS(X) \
&in_input[X].dev_attr.attr, \
&in_status[X].dev_attr.attr, \
&in_min[X].dev_attr.attr, \
- &in_max[X].dev_attr.attr
+ &in_max[X].dev_attr.attr, \
+ &in_min_alarm[X].dev_attr.attr, \
+ &in_max_alarm[X].dev_attr.attr
static ssize_t show_vid(struct device *dev, struct device_attribute *attr, char *buf)
{
@@ -658,12 +714,68 @@ static struct sensor_device_attribute therm_crit[] = {
show_therm_crit, set_therm_crit, 2+11),
};
+/* show_therm_min/max_alarm() reads data from the per-channel voltage
+ status register (sec 11.5.12) */
+
+static ssize_t show_therm_min_alarm(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct pc87360_data *data = pc87360_update_device(dev);
+ unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+ return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_therm_max_alarm(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct pc87360_data *data = pc87360_update_device(dev);
+ unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+ return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MAX));
+}
+static ssize_t show_therm_crit_alarm(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct pc87360_data *data = pc87360_update_device(dev);
+ unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+ return sprintf(buf, "%u\n", !!(data->in_status[nr] & TEMP_ALM_CRIT));
+}
+
+static struct sensor_device_attribute therm_min_alarm[] = {
+ SENSOR_ATTR(temp4_min_alarm, S_IRUGO,
+ show_therm_min_alarm, NULL, 0+11),
+ SENSOR_ATTR(temp5_min_alarm, S_IRUGO,
+ show_therm_min_alarm, NULL, 1+11),
+ SENSOR_ATTR(temp6_min_alarm, S_IRUGO,
+ show_therm_min_alarm, NULL, 2+11),
+};
+static struct sensor_device_attribute therm_max_alarm[] = {
+ SENSOR_ATTR(temp4_max_alarm, S_IRUGO,
+ show_therm_max_alarm, NULL, 0+11),
+ SENSOR_ATTR(temp5_max_alarm, S_IRUGO,
+ show_therm_max_alarm, NULL, 1+11),
+ SENSOR_ATTR(temp6_max_alarm, S_IRUGO,
+ show_therm_max_alarm, NULL, 2+11),
+};
+static struct sensor_device_attribute therm_crit_alarm[] = {
+ SENSOR_ATTR(temp4_crit_alarm, S_IRUGO,
+ show_therm_crit_alarm, NULL, 0+11),
+ SENSOR_ATTR(temp5_crit_alarm, S_IRUGO,
+ show_therm_crit_alarm, NULL, 1+11),
+ SENSOR_ATTR(temp6_crit_alarm, S_IRUGO,
+ show_therm_crit_alarm, NULL, 2+11),
+};
+
#define THERM_UNIT_ATTRS(X) \
&therm_input[X].dev_attr.attr, \
&therm_status[X].dev_attr.attr, \
&therm_min[X].dev_attr.attr, \
&therm_max[X].dev_attr.attr, \
- &therm_crit[X].dev_attr.attr
+ &therm_crit[X].dev_attr.attr, \
+ &therm_min_alarm[X].dev_attr.attr, \
+ &therm_max_alarm[X].dev_attr.attr, \
+ &therm_crit_alarm[X].dev_attr.attr
static struct attribute * pc8736x_therm_attr_array[] = {
THERM_UNIT_ATTRS(0),
@@ -790,12 +902,76 @@ static ssize_t show_temp_alarms(struct device *dev, struct device_attribute *att
}
static DEVICE_ATTR(alarms_temp, S_IRUGO, show_temp_alarms, NULL);
+/* show_temp_min/max_alarm() reads data from the per-channel status
+ register (sec 12.3.7), not the temp event status registers (sec
+ 12.3.2) that show_temp_alarm() reads (via data->temp_alarms) */
+
+static ssize_t show_temp_min_alarm(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct pc87360_data *data = pc87360_update_device(dev);
+ unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+ return sprintf(buf, "%u\n", !!(data->temp_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_temp_max_alarm(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct pc87360_data *data = pc87360_update_device(dev);
+ unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+ return sprintf(buf, "%u\n", !!(data->temp_status[nr] & CHAN_ALM_MAX));
+}
+static ssize_t show_temp_crit_alarm(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct pc87360_data *data = pc87360_update_device(dev);
+ unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+ return sprintf(buf, "%u\n", !!(data->temp_status[nr] & TEMP_ALM_CRIT));
+}
+
+static struct sensor_device_attribute temp_min_alarm[] = {
+ SENSOR_ATTR(temp1_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 0),
+ SENSOR_ATTR(temp2_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 1),
+ SENSOR_ATTR(temp3_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 2),
+};
+static struct sensor_device_attribute temp_max_alarm[] = {
+ SENSOR_ATTR(temp1_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 0),
+ SENSOR_ATTR(temp2_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 1),
+ SENSOR_ATTR(temp3_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 2),
+};
+static struct sensor_device_attribute temp_crit_alarm[] = {
+ SENSOR_ATTR(temp1_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 0),
+ SENSOR_ATTR(temp2_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 1),
+ SENSOR_ATTR(temp3_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 2),
+};
+
+#define TEMP_FAULT 0x40 /* open diode */
+static ssize_t show_temp_fault(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct pc87360_data *data = pc87360_update_device(dev);
+ unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+ return sprintf(buf, "%u\n", !!(data->temp_status[nr] & TEMP_FAULT));
+}
+static struct sensor_device_attribute temp_fault[] = {
+ SENSOR_ATTR(temp1_fault, S_IRUGO, show_temp_fault, NULL, 0),
+ SENSOR_ATTR(temp2_fault, S_IRUGO, show_temp_fault, NULL, 1),
+ SENSOR_ATTR(temp3_fault, S_IRUGO, show_temp_fault, NULL, 2),
+};
+
#define TEMP_UNIT_ATTRS(X) \
&temp_input[X].dev_attr.attr, \
&temp_status[X].dev_attr.attr, \
&temp_min[X].dev_attr.attr, \
&temp_max[X].dev_attr.attr, \
- &temp_crit[X].dev_attr.attr
+ &temp_crit[X].dev_attr.attr, \
+ &temp_min_alarm[X].dev_attr.attr, \
+ &temp_max_alarm[X].dev_attr.attr, \
+ &temp_crit_alarm[X].dev_attr.attr, \
+ &temp_fault[X].dev_attr.attr
static struct attribute * pc8736x_temp_attr_array[] = {
TEMP_UNIT_ATTRS(0),
@@ -809,8 +985,8 @@ static const struct attribute_group pc8736x_temp_group = {
.attrs = pc8736x_temp_attr_array,
};
-static ssize_t show_name(struct device *dev, struct device_attribute
- *devattr, char *buf)
+static ssize_t show_name(struct device *dev,
+ struct device_attribute *devattr, char *buf)
{
struct pc87360_data *data = dev_get_drvdata(dev);
return sprintf(buf, "%s\n", data->name);
@@ -955,7 +1131,7 @@ static int __devinit pc87360_probe(struct platform_device *pdev)
mutex_init(&data->update_lock);
platform_set_drvdata(pdev, data);
- for (i = 0; i < 3; i++) {
+ for (i = 0; i < LDNI_MAX; i++) {
if (((data->address[i] = extra_isa[i]))
&& !request_region(extra_isa[i], PC87360_EXTENT,
pc87360_driver.driver.name)) {
@@ -1031,7 +1207,15 @@ static int __devinit pc87360_probe(struct platform_device *pdev)
|| (err = device_create_file(dev,
&temp_crit[i].dev_attr))
|| (err = device_create_file(dev,
- &temp_status[i].dev_attr)))
+ &temp_status[i].dev_attr))
+ || (err = device_create_file(dev,
+ &temp_min_alarm[i].dev_attr))
+ || (err = device_create_file(dev,
+ &temp_max_alarm[i].dev_attr))
+ || (err = device_create_file(dev,
+ &temp_crit_alarm[i].dev_attr))
+ || (err = device_create_file(dev,
+ &temp_fault[i].dev_attr)))
goto ERROR3;
}
if ((err = device_create_file(dev, &dev_attr_alarms_temp)))
@@ -1131,6 +1315,16 @@ static void pc87360_write_value(struct pc87360_data *data, u8 ldi, u8 bank,
mutex_unlock(&(data->lock));
}
+/* (temp & vin) channel conversion status register flags (pdf sec.11.5.12) */
+#define CHAN_CNVRTD 0x80 /* new data ready */
+#define CHAN_ENA 0x01 /* enabled channel (temp or vin) */
+#define CHAN_ALM_ENA 0x10 /* propagate to alarms-reg ?? (chk val!) */
+#define CHAN_READY (CHAN_ENA|CHAN_CNVRTD) /* sample ready mask */
+
+#define TEMP_OTS_OE 0x20 /* OTS Output Enable */
+#define VIN_RW1C_MASK (CHAN_READY|CHAN_ALM_MAX|CHAN_ALM_MIN) /* 0x87 */
+#define TEMP_RW1C_MASK (VIN_RW1C_MASK|TEMP_ALM_CRIT|TEMP_FAULT) /* 0xCF */
+
static void pc87360_init_device(struct platform_device *pdev,
int use_thermistors)
{
@@ -1152,11 +1346,12 @@ static void pc87360_init_device(struct platform_device *pdev,
nr = data->innr < 11 ? data->innr : 11;
for (i = 0; i < nr; i++) {
+ reg = pc87360_read_value(data, LD_IN, i,
+ PC87365_REG_IN_STATUS);
+ dev_dbg(&pdev->dev, "bios in%d status:0x%02x\n", i, reg);
if (init >= init_in[i]) {
/* Forcibly enable voltage channel */
- reg = pc87360_read_value(data, LD_IN, i,
- PC87365_REG_IN_STATUS);
- if (!(reg & 0x01)) {
+ if (!(reg & CHAN_ENA)) {
dev_dbg(&pdev->dev, "Forcibly "
"enabling in%d\n", i);
pc87360_write_value(data, LD_IN, i,
@@ -1168,19 +1363,24 @@ static void pc87360_init_device(struct platform_device *pdev,
/* We can't blindly trust the Super-I/O space configuration bit,
most BIOS won't set it properly */
+ dev_dbg(&pdev->dev, "bios thermistors:%d\n", use_thermistors);
for (i = 11; i < data->innr; i++) {
reg = pc87360_read_value(data, LD_IN, i,
PC87365_REG_TEMP_STATUS);
- use_thermistors = use_thermistors || (reg & 0x01);
+ use_thermistors = use_thermistors || (reg & CHAN_ENA);
+ /* thermistors are temp[4-6], measured on vin[11-14] */
+ dev_dbg(&pdev->dev, "bios temp%d_status:0x%02x\n", i-7, reg);
}
+ dev_dbg(&pdev->dev, "using thermistors:%d\n", use_thermistors);
i = use_thermistors ? 2 : 0;
for (; i < data->tempnr; i++) {
+ reg = pc87360_read_value(data, LD_TEMP, i,
+ PC87365_REG_TEMP_STATUS);
+ dev_dbg(&pdev->dev, "bios temp%d_status:0x%02x\n", i+1, reg);
if (init >= init_temp[i]) {
/* Forcibly enable temperature channel */
- reg = pc87360_read_value(data, LD_TEMP, i,
- PC87365_REG_TEMP_STATUS);
- if (!(reg & 0x01)) {
+ if (!(reg & CHAN_ENA)) {
dev_dbg(&pdev->dev, "Forcibly "
"enabling temp%d\n", i+1);
pc87360_write_value(data, LD_TEMP, i,
@@ -1197,7 +1397,7 @@ static void pc87360_init_device(struct platform_device *pdev,
diodes */
reg = pc87360_read_value(data, LD_TEMP,
(i-11)/2, PC87365_REG_TEMP_STATUS);
- if (reg & 0x01) {
+ if (reg & CHAN_ENA) {
dev_dbg(&pdev->dev, "Skipping "
"temp%d, pin already in use "
"by temp%d\n", i-7, (i-11)/2);
@@ -1207,7 +1407,7 @@ static void pc87360_init_device(struct platform_device *pdev,
/* Forcibly enable thermistor channel */
reg = pc87360_read_value(data, LD_IN, i,
PC87365_REG_IN_STATUS);
- if (!(reg & 0x01)) {
+ if (!(reg & CHAN_ENA)) {
dev_dbg(&pdev->dev, "Forcibly "
"enabling temp%d\n", i-7);
pc87360_write_value(data, LD_IN, i,
@@ -1221,7 +1421,8 @@ static void pc87360_init_device(struct platform_device *pdev,
if (data->innr) {
reg = pc87360_read_value(data, LD_IN, NO_BANK,
PC87365_REG_IN_CONFIG);
- if (reg & 0x01) {
+ dev_dbg(&pdev->dev, "bios vin-cfg:0x%02x\n", reg);
+ if (reg & CHAN_ENA) {
dev_dbg(&pdev->dev, "Forcibly "
"enabling monitoring (VLM)\n");
pc87360_write_value(data, LD_IN, NO_BANK,
@@ -1233,7 +1434,8 @@ static void pc87360_init_device(struct platform_device *pdev,
if (data->tempnr) {
reg = pc87360_read_value(data, LD_TEMP, NO_BANK,
PC87365_REG_TEMP_CONFIG);
- if (reg & 0x01) {
+ dev_dbg(&pdev->dev, "bios temp-cfg:0x%02x\n", reg);
+ if (reg & CHAN_ENA) {
dev_dbg(&pdev->dev, "Forcibly enabling "
"monitoring (TMS)\n");
pc87360_write_value(data, LD_TEMP, NO_BANK,
@@ -1336,11 +1538,11 @@ static struct pc87360_data *pc87360_update_device(struct device *dev)
pc87360_write_value(data, LD_IN, i,
PC87365_REG_IN_STATUS,
data->in_status[i]);
- if ((data->in_status[i] & 0x81) == 0x81) {
+ if ((data->in_status[i] & CHAN_READY) == CHAN_READY) {
data->in[i] = pc87360_read_value(data, LD_IN,
i, PC87365_REG_IN);
}
- if (data->in_status[i] & 0x01) {
+ if (data->in_status[i] & CHAN_ENA) {
data->in_min[i] = pc87360_read_value(data,
LD_IN, i,
PC87365_REG_IN_MIN);
@@ -1373,12 +1575,12 @@ static struct pc87360_data *pc87360_update_device(struct device *dev)
pc87360_write_value(data, LD_TEMP, i,
PC87365_REG_TEMP_STATUS,
data->temp_status[i]);
- if ((data->temp_status[i] & 0x81) == 0x81) {
+ if ((data->temp_status[i] & CHAN_READY) == CHAN_READY) {
data->temp[i] = pc87360_read_value(data,
LD_TEMP, i,
PC87365_REG_TEMP);
}
- if (data->temp_status[i] & 0x01) {
+ if (data->temp_status[i] & CHAN_ENA) {
data->temp_min[i] = pc87360_read_value(data,
LD_TEMP, i,
PC87365_REG_TEMP_MIN);
diff --git a/drivers/i2c/chips/at24.c b/drivers/i2c/chips/at24.c
index 2a4acb26956..d4775528abc 100644
--- a/drivers/i2c/chips/at24.c
+++ b/drivers/i2c/chips/at24.c
@@ -460,7 +460,6 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
*/
at24->bin.attr.name = "eeprom";
at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
- at24->bin.attr.owner = THIS_MODULE;
at24->bin.read = at24_bin_read;
at24->bin.size = chip.byte_len;
diff --git a/drivers/i2c/chips/ds1682.c b/drivers/i2c/chips/ds1682.c
index 23be4d42cb0..f3ee4a1abb7 100644
--- a/drivers/i2c/chips/ds1682.c
+++ b/drivers/i2c/chips/ds1682.c
@@ -190,7 +190,6 @@ static struct bin_attribute ds1682_eeprom_attr = {
.attr = {
.name = "eeprom",
.mode = S_IRUGO | S_IWUSR,
- .owner = THIS_MODULE,
},
.size = DS1682_EEPROM_SIZE,
.read = ds1682_eeprom_read,
diff --git a/drivers/i2c/chips/menelaus.c b/drivers/i2c/chips/menelaus.c
index 176126d3a01..4b364bae6b3 100644
--- a/drivers/i2c/chips/menelaus.c
+++ b/drivers/i2c/chips/menelaus.c
@@ -832,52 +832,52 @@ static irqreturn_t menelaus_irq(int irq, void *_menelaus)
static void menelaus_to_time(char *regs, struct rtc_time *t)
{
- t->tm_sec = BCD2BIN(regs[0]);
- t->tm_min = BCD2BIN(regs[1]);
+ t->tm_sec = bcd2bin(regs[0]);
+ t->tm_min = bcd2bin(regs[1]);
if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
- t->tm_hour = BCD2BIN(regs[2] & 0x1f) - 1;
+ t->tm_hour = bcd2bin(regs[2] & 0x1f) - 1;
if (regs[2] & RTC_HR_PM)
t->tm_hour += 12;
} else
- t->tm_hour = BCD2BIN(regs[2] & 0x3f);
- t->tm_mday = BCD2BIN(regs[3]);
- t->tm_mon = BCD2BIN(regs[4]) - 1;
- t->tm_year = BCD2BIN(regs[5]) + 100;
+ t->tm_hour = bcd2bin(regs[2] & 0x3f);
+ t->tm_mday = bcd2bin(regs[3]);
+ t->tm_mon = bcd2bin(regs[4]) - 1;
+ t->tm_year = bcd2bin(regs[5]) + 100;
}
static int time_to_menelaus(struct rtc_time *t, int regnum)
{
int hour, status;
- status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_sec));
+ status = menelaus_write_reg(regnum++, bin2bcd(t->tm_sec));
if (status < 0)
goto fail;
- status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_min));
+ status = menelaus_write_reg(regnum++, bin2bcd(t->tm_min));
if (status < 0)
goto fail;
if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
hour = t->tm_hour + 1;
if (hour > 12)
- hour = RTC_HR_PM | BIN2BCD(hour - 12);
+ hour = RTC_HR_PM | bin2bcd(hour - 12);
else
- hour = BIN2BCD(hour);
+ hour = bin2bcd(hour);
} else
- hour = BIN2BCD(t->tm_hour);
+ hour = bin2bcd(t->tm_hour);
status = menelaus_write_reg(regnum++, hour);
if (status < 0)
goto fail;
- status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_mday));
+ status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mday));
if (status < 0)
goto fail;
- status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_mon + 1));
+ status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mon + 1));
if (status < 0)
goto fail;
- status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_year - 100));
+ status = menelaus_write_reg(regnum++, bin2bcd(t->tm_year - 100));
if (status < 0)
goto fail;
@@ -914,7 +914,7 @@ static int menelaus_read_time(struct device *dev, struct rtc_time *t)
}
menelaus_to_time(regs, t);
- t->tm_wday = BCD2BIN(regs[6]);
+ t->tm_wday = bcd2bin(regs[6]);
return 0;
}
@@ -927,7 +927,7 @@ static int menelaus_set_time(struct device *dev, struct rtc_time *t)
status = time_to_menelaus(t, MENELAUS_RTC_SEC);
if (status < 0)
return status;
- status = menelaus_write_reg(MENELAUS_RTC_WKDAY, BIN2BCD(t->tm_wday));
+ status = menelaus_write_reg(MENELAUS_RTC_WKDAY, bin2bcd(t->tm_wday));
if (status < 0) {
dev_err(&the_menelaus->client->dev, "rtc write reg %02x "
"err %d\n", MENELAUS_RTC_WKDAY, status);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index a78d35aecee..f1e82a92e61 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -122,7 +122,7 @@ struct cm_counter_attribute {
#define CM_COUNTER_ATTR(_name, _index) \
struct cm_counter_attribute cm_##_name##_counter_attr = { \
- .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \
+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
.index = _index \
}
diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c
index c7c770c2898..aa1ff524256 100644
--- a/drivers/media/dvb/ttpci/av7110.c
+++ b/drivers/media/dvb/ttpci/av7110.c
@@ -36,7 +36,6 @@
#include <linux/fs.h>
#include <linux/timer.h>
#include <linux/poll.h>
-#include <linux/byteorder/swabb.h>
#include <linux/smp_lock.h>
#include <linux/kernel.h>
@@ -52,6 +51,7 @@
#include <linux/i2c.h>
#include <linux/kthread.h>
#include <asm/unaligned.h>
+#include <asm/byteorder.h>
#include <asm/system.h>
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index fa8be0731a3..a4b1708fafe 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -41,6 +41,7 @@
#include <linux/pagemap.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
+#include <asm/byteorder.h>
#include <linux/dvb/video.h>
#include <linux/dvb/audio.h>
diff --git a/drivers/media/video/ivtv/ivtv-driver.h b/drivers/media/video/ivtv/ivtv-driver.h
index bc29436e8a3..3733b2afec5 100644
--- a/drivers/media/video/ivtv/ivtv-driver.h
+++ b/drivers/media/video/ivtv/ivtv-driver.h
@@ -55,6 +55,7 @@
#include <linux/mutex.h>
#include <asm/uaccess.h>
#include <asm/system.h>
+#include <asm/byteorder.h>
#include <linux/dvb/video.h>
#include <linux/dvb/audio.h>
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 6e291bf8237..5263913e0c6 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1044,7 +1044,6 @@ static int mspro_block_read_attributes(struct memstick_dev *card)
s_attr->dev_attr.attr.name = s_attr->name;
s_attr->dev_attr.attr.mode = S_IRUGO;
- s_attr->dev_attr.attr.owner = THIS_MODULE;
s_attr->dev_attr.show = mspro_block_attr_show(s_attr->id);
if (!rc)
diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c
index 5dabfb69ee5..4b7c24c519c 100644
--- a/drivers/misc/hp-wmi.c
+++ b/drivers/misc/hp-wmi.c
@@ -82,6 +82,7 @@ static struct key_entry hp_wmi_keymap[] = {
{KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
{KE_KEY, 0x20e6, KEY_PROG1},
{KE_KEY, 0x2142, KEY_MEDIA},
+ {KE_KEY, 0x213b, KEY_INFO},
{KE_KEY, 0x231b, KEY_HELP},
{KE_END, 0}
};
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 14f11f8b9e5..a90d50c2c3e 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -172,6 +172,11 @@ config MTD_CHAR
memory chips, and also use ioctl() to obtain information about
the device, or to erase parts of it.
+config HAVE_MTD_OTP
+ bool
+ help
+ Enable access to OTP regions using MTD_CHAR.
+
config MTD_BLKDEVS
tristate "Common interface to block layer for MTD 'translation layers'"
depends on BLOCK
diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig
index 479d32b57a1..9408099eec4 100644
--- a/drivers/mtd/chips/Kconfig
+++ b/drivers/mtd/chips/Kconfig
@@ -6,6 +6,7 @@ menu "RAM/ROM/Flash chip drivers"
config MTD_CFI
tristate "Detect flash chips by Common Flash Interface (CFI) probe"
select MTD_GEN_PROBE
+ select MTD_CFI_UTIL
help
The Common Flash Interface specification was developed by Intel,
AMD and other flash manufactures that provides a universal method
@@ -154,6 +155,7 @@ config MTD_CFI_I8
config MTD_OTP
bool "Protection Registers aka one-time programmable (OTP) bits"
depends on MTD_CFI_ADV_OPTIONS
+ select HAVE_MTD_OTP
default n
help
This enables support for reading, writing and locking so called
@@ -187,7 +189,7 @@ config MTD_CFI_INTELEXT
StrataFlash and other parts.
config MTD_CFI_AMDSTD
- tristate "Support for AMD/Fujitsu flash chips"
+ tristate "Support for AMD/Fujitsu/Spansion flash chips"
depends on MTD_GEN_PROBE
select MTD_CFI_UTIL
help
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 5f1b472137a..c93a8be5d5f 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -478,6 +478,28 @@ struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary)
else
cfi->chips[i].erase_time = 2000000;
+ if (cfi->cfiq->WordWriteTimeoutTyp &&
+ cfi->cfiq->WordWriteTimeoutMax)
+ cfi->chips[i].word_write_time_max =
+ 1<<(cfi->cfiq->WordWriteTimeoutTyp +
+ cfi->cfiq->WordWriteTimeoutMax);
+ else
+ cfi->chips[i].word_write_time_max = 50000 * 8;
+
+ if (cfi->cfiq->BufWriteTimeoutTyp &&
+ cfi->cfiq->BufWriteTimeoutMax)
+ cfi->chips[i].buffer_write_time_max =
+ 1<<(cfi->cfiq->BufWriteTimeoutTyp +
+ cfi->cfiq->BufWriteTimeoutMax);
+
+ if (cfi->cfiq->BlockEraseTimeoutTyp &&
+ cfi->cfiq->BlockEraseTimeoutMax)
+ cfi->chips[i].erase_time_max =
+ 1000<<(cfi->cfiq->BlockEraseTimeoutTyp +
+ cfi->cfiq->BlockEraseTimeoutMax);
+ else
+ cfi->chips[i].erase_time_max = 2000000 * 8;
+
cfi->chips[i].ref_point_counter = 0;
init_waitqueue_head(&(cfi->chips[i].wq));
}
@@ -703,6 +725,10 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long
struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
unsigned long timeo = jiffies + HZ;
+ /* Prevent setting state FL_SYNCING for chip in suspended state. */
+ if (mode == FL_SYNCING && chip->oldstate != FL_READY)
+ goto sleep;
+
switch (chip->state) {
case FL_STATUS:
@@ -808,8 +834,9 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
DECLARE_WAITQUEUE(wait, current);
retry:
- if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING
- || mode == FL_OTP_WRITE || mode == FL_SHUTDOWN)) {
+ if (chip->priv &&
+ (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE
+ || mode == FL_SHUTDOWN) && chip->state != FL_SYNCING) {
/*
* OK. We have possibility for contention on the write/erase
* operations which are global to the real chip and not per
@@ -859,6 +886,14 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
return ret;
}
spin_lock(&shared->lock);
+
+ /* We should not own chip if it is already
+ * in FL_SYNCING state. Put contender and retry. */
+ if (chip->state == FL_SYNCING) {
+ put_chip(map, contender, contender->start);
+ spin_unlock(contender->mutex);
+ goto retry;
+ }
spin_unlock(contender->mutex);
}
@@ -1012,7 +1047,7 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip,
static int __xipram xip_wait_for_operation(
struct map_info *map, struct flchip *chip,
- unsigned long adr, unsigned int chip_op_time )
+ unsigned long adr, unsigned int chip_op_time_max)
{
struct cfi_private *cfi = map->fldrv_priv;
struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
@@ -1021,7 +1056,7 @@ static int __xipram xip_wait_for_operation(
flstate_t oldstate, newstate;
start = xip_currtime();
- usec = chip_op_time * 8;
+ usec = chip_op_time_max;
if (usec == 0)
usec = 500000;
done = 0;
@@ -1131,8 +1166,8 @@ static int __xipram xip_wait_for_operation(
#define XIP_INVAL_CACHED_RANGE(map, from, size) \
INVALIDATE_CACHED_RANGE(map, from, size)
-#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec) \
- xip_wait_for_operation(map, chip, cmd_adr, usec)
+#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec, usec_max) \
+ xip_wait_for_operation(map, chip, cmd_adr, usec_max)
#else
@@ -1144,7 +1179,7 @@ static int __xipram xip_wait_for_operation(
static int inval_cache_and_wait_for_operation(
struct map_info *map, struct flchip *chip,
unsigned long cmd_adr, unsigned long inval_adr, int inval_len,
- unsigned int chip_op_time)
+ unsigned int chip_op_time, unsigned int chip_op_time_max)
{
struct cfi_private *cfi = map->fldrv_priv;
map_word status, status_OK = CMD(0x80);
@@ -1156,8 +1191,7 @@ static int inval_cache_and_wait_for_operation(
INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len);
spin_lock(chip->mutex);
- /* set our timeout to 8 times the expected delay */
- timeo = chip_op_time * 8;
+ timeo = chip_op_time_max;
if (!timeo)
timeo = 500000;
reset_timeo = timeo;
@@ -1217,8 +1251,8 @@ static int inval_cache_and_wait_for_operation(
#endif
-#define WAIT_TIMEOUT(map, chip, adr, udelay) \
- INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay);
+#define WAIT_TIMEOUT(map, chip, adr, udelay, udelay_max) \
+ INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay, udelay_max);
static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t adr, size_t len)
@@ -1452,7 +1486,8 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
adr, map_bankwidth(map),
- chip->word_write_time);
+ chip->word_write_time,
+ chip->word_write_time_max);
if (ret) {
xip_enable(map, chip, adr);
printk(KERN_ERR "%s: word write error (status timeout)\n", map->name);
@@ -1623,7 +1658,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
chip->state = FL_WRITING_TO_BUFFER;
map_write(map, write_cmd, cmd_adr);
- ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0);
+ ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0, 0);
if (ret) {
/* Argh. Not ready for write to buffer */
map_word Xstatus = map_read(map, cmd_adr);
@@ -1640,7 +1675,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
/* Figure out the number of words to write */
word_gap = (-adr & (map_bankwidth(map)-1));
- words = (len - word_gap + map_bankwidth(map) - 1) / map_bankwidth(map);
+ words = DIV_ROUND_UP(len - word_gap, map_bankwidth(map));
if (!word_gap) {
words--;
} else {
@@ -1692,7 +1727,8 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
ret = INVAL_CACHE_AND_WAIT(map, chip, cmd_adr,
initial_adr, initial_len,
- chip->buffer_write_time);
+ chip->buffer_write_time,
+ chip->buffer_write_time_max);
if (ret) {
map_write(map, CMD(0x70), cmd_adr);
chip->state = FL_STATUS;
@@ -1827,7 +1863,8 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
adr, len,
- chip->erase_time);
+ chip->erase_time,
+ chip->erase_time_max);
if (ret) {
map_write(map, CMD(0x70), adr);
chip->state = FL_STATUS;
@@ -2006,7 +2043,7 @@ static int __xipram do_xxlock_oneblock(struct map_info *map, struct flchip *chip
*/
udelay = (!extp || !(extp->FeatureSupport & (1 << 5))) ? 1000000/HZ : 0;
- ret = WAIT_TIMEOUT(map, chip, adr, udelay);
+ ret = WAIT_TIMEOUT(map, chip, adr, udelay, udelay * 100);
if (ret) {
map_write(map, CMD(0x70), adr);
chip->state = FL_STATUS;
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index a972cc6be43..3e6f5d8609e 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -13,6 +13,8 @@
* XIP support hooks by Vitaly Wool (based on code for Intel flash
* by Nicolas Pitre)
*
+ * 25/09/2008 Christopher Moore: TopBottom fixup for many Macronix with CFI V1.0
+ *
* Occasionally maintained by Thayne Harbaugh tharbaugh at lnxi dot com
*
* This code is GPL
@@ -43,6 +45,7 @@
#define MANUFACTURER_AMD 0x0001
#define MANUFACTURER_ATMEL 0x001F
+#define MANUFACTURER_MACRONIX 0x00C2
#define MANUFACTURER_SST 0x00BF
#define SST49LF004B 0x0060
#define SST49LF040B 0x0050
@@ -144,12 +147,44 @@ static void fixup_amd_bootblock(struct mtd_info *mtd, void* param)
if (((major << 8) | minor) < 0x3131) {
/* CFI version 1.0 => don't trust bootloc */
+
+ DEBUG(MTD_DEBUG_LEVEL1,
+ "%s: JEDEC Vendor ID is 0x%02X Device ID is 0x%02X\n",
+ map->name, cfi->mfr, cfi->id);
+
+ /* AFAICS all 29LV400 with a bottom boot block have a device ID
+ * of 0x22BA in 16-bit mode and 0xBA in 8-bit mode.
+ * These were badly detected as they have the 0x80 bit set
+ * so treat them as a special case.
+ */
+ if (((cfi->id == 0xBA) || (cfi->id == 0x22BA)) &&
+
+ /* Macronix added CFI to their 2nd generation
+ * MX29LV400C B/T but AFAICS no other 29LV400 (AMD,
+ * Fujitsu, Spansion, EON, ESI and older Macronix)
+ * has CFI.
+ *
+ * Therefore also check the manufacturer.
+ * This reduces the risk of false detection due to
+ * the 8-bit device ID.
+ */
+ (cfi->mfr == MANUFACTURER_MACRONIX)) {
+ DEBUG(MTD_DEBUG_LEVEL1,
+ "%s: Macronix MX29LV400C with bottom boot block"
+ " detected\n", map->name);
+ extp->TopBottom = 2; /* bottom boot */
+ } else
if (cfi->id & 0x80) {
printk(KERN_WARNING "%s: JEDEC Device ID is 0x%02X. Assuming broken CFI table.\n", map->name, cfi->id);
extp->TopBottom = 3; /* top boot */
} else {
extp->TopBottom = 2; /* bottom boot */
}
+
+ DEBUG(MTD_DEBUG_LEVEL1,
+ "%s: AMD CFI PRI V%c.%c has no boot block field;"
+ " deduced %s from Device ID\n", map->name, major, minor,
+ extp->TopBottom == 2 ? "bottom" : "top");
}
}
#endif
@@ -178,10 +213,18 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
if (atmel_pri.Features & 0x02)
extp->EraseSuspend = 2;
- if (atmel_pri.BottomBoot)
- extp->TopBottom = 2;
- else
- extp->TopBottom = 3;
+ /* Some chips got it backwards... */
+ if (cfi->id == AT49BV6416) {
+ if (atmel_pri.BottomBoot)
+ extp->TopBottom = 3;
+ else
+ extp->TopBottom = 2;
+ } else {
+ if (atmel_pri.BottomBoot)
+ extp->TopBottom = 2;
+ else
+ extp->TopBottom = 3;
+ }
/* burst write mode not supported */
cfi->cfiq->BufWriteTimeoutTyp = 0;
@@ -243,6 +286,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
#ifdef AMD_BOOTLOC_BUG
{ CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL },
+ { MANUFACTURER_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock, NULL },
#endif
{ CFI_MFR_AMD, 0x0050, fixup_use_secsi, NULL, },
{ CFI_MFR_AMD, 0x0053, fixup_use_secsi, NULL, },
diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c
index c418e92e1d9..e63e6749429 100644
--- a/drivers/mtd/chips/cfi_probe.c
+++ b/drivers/mtd/chips/cfi_probe.c
@@ -44,17 +44,14 @@ do { \
#define xip_enable(base, map, cfi) \
do { \
- cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \
- cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \
+ cfi_qry_mode_off(base, map, cfi); \
xip_allowed(base, map); \
} while (0)
#define xip_disable_qry(base, map, cfi) \
do { \
xip_disable(); \
- cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \
- cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \
- cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); \
+ cfi_qry_mode_on(base, map, cfi); \
} while (0)
#else
@@ -70,32 +67,6 @@ do { \
in: interleave,type,mode
ret: table index, <0 for error
*/
-static int __xipram qry_present(struct map_info *map, __u32 base,
- struct cfi_private *cfi)
-{
- int osf = cfi->interleave * cfi->device_type; // scale factor
- map_word val[3];
- map_word qry[3];
-
- qry[0] = cfi_build_cmd('Q', map, cfi);
- qry[1] = cfi_build_cmd('R', map, cfi);
- qry[2] = cfi_build_cmd('Y', map, cfi);
-
- val[0] = map_read(map, base + osf*0x10);
- val[1] = map_read(map, base + osf*0x11);
- val[2] = map_read(map, base + osf*0x12);
-
- if (!map_word_equal(map, qry[0], val[0]))
- return 0;
-
- if (!map_word_equal(map, qry[1], val[1]))
- return 0;
-
- if (!map_word_equal(map, qry[2], val[2]))
- return 0;
-
- return 1; // "QRY" found
-}
static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
unsigned long *chip_map, struct cfi_private *cfi)
@@ -116,11 +87,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
}
xip_disable();
- cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
- cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
- cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
-
- if (!qry_present(map,base,cfi)) {
+ if (!cfi_qry_mode_on(base, map, cfi)) {
xip_enable(base, map, cfi);
return 0;
}
@@ -141,14 +108,13 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
start = i << cfi->chipshift;
/* This chip should be in read mode if it's one
we've already touched. */
- if (qry_present(map, start, cfi)) {
+ if (cfi_qry_present(map, start, cfi)) {
/* Eep. This chip also had the QRY marker.
* Is it an alias for the new one? */
- cfi_send_gen_cmd(0xF0, 0, start, map, cfi, cfi->device_type, NULL);
- cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL);
+ cfi_qry_mode_off(start, map, cfi);
/* If the QRY marker goes away, it's an alias */
- if (!qry_present(map, start, cfi)) {
+ if (!cfi_qry_present(map, start, cfi)) {
xip_allowed(base, map);
printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n",
map->name, base, start);
@@ -158,10 +124,9 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
* unfortunate. Stick the new chip in read mode
* too and if it's the same, assume it's an alias. */
/* FIXME: Use other modes to do a proper check */
- cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
- cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL);
+ cfi_qry_mode_off(base, map, cfi);
- if (qry_present(map, base, cfi)) {
+ if (cfi_qry_present(map, base, cfi)) {
xip_allowed(base, map);
printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n",
map->name, base, start);
@@ -176,8 +141,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
cfi->numchips++;
/* Put it back into Read Mode */
- cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
- cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+ cfi_qry_mode_off(base, map, cfi);
xip_allowed(base, map);
printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n",
@@ -237,9 +201,7 @@ static int __xipram cfi_chip_setup(struct map_info *map,
cfi_read_query(map, base + 0xf * ofs_factor);
/* Put it back into Read Mode */
- cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
- /* ... even if it's an Intel chip */
- cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+ cfi_qry_mode_off(base, map, cfi);
xip_allowed(base, map);
/* Do any necessary byteswapping */
diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c
index 0ee45701801..34d40e25d31 100644
--- a/drivers/mtd/chips/cfi_util.c
+++ b/drivers/mtd/chips/cfi_util.c
@@ -24,6 +24,66 @@
#include <linux/mtd/cfi.h>
#include <linux/mtd/compatmac.h>
+int __xipram cfi_qry_present(struct map_info *map, __u32 base,
+ struct cfi_private *cfi)
+{
+ int osf = cfi->interleave * cfi->device_type; /* scale factor */
+ map_word val[3];
+ map_word qry[3];
+
+ qry[0] = cfi_build_cmd('Q', map, cfi);
+ qry[1] = cfi_build_cmd('R', map, cfi);
+ qry[2] = cfi_build_cmd('Y', map, cfi);
+
+ val[0] = map_read(map, base + osf*0x10);
+ val[1] = map_read(map, base + osf*0x11);
+ val[2] = map_read(map, base + osf*0x12);
+
+ if (!map_word_equal(map, qry[0], val[0]))
+ return 0;
+
+ if (!map_word_equal(map, qry[1], val[1]))
+ return 0;
+
+ if (!map_word_equal(map, qry[2], val[2]))
+ return 0;
+
+ return 1; /* "QRY" found */
+}
+EXPORT_SYMBOL_GPL(cfi_qry_present);
+
+int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map,
+ struct cfi_private *cfi)
+{
+ cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+ cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
+ if (cfi_qry_present(map, base, cfi))
+ return 1;
+ /* QRY not found probably we deal with some odd CFI chips */
+ /* Some revisions of some old Intel chips? */
+ cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+ cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+ cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
+ if (cfi_qry_present(map, base, cfi))
+ return 1;
+ /* ST M29DW chips */
+ cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+ cfi_send_gen_cmd(0x98, 0x555, base, map, cfi, cfi->device_type, NULL);
+ if (cfi_qry_present(map, base, cfi))
+ return 1;
+ /* QRY not found */
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cfi_qry_mode_on);
+
+void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
+ struct cfi_private *cfi)
+{
+ cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+ cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+}
+EXPORT_SYMBOL_GPL(cfi_qry_mode_off);
+
struct cfi_extquery *
__xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* name)
{
@@ -48,8 +108,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
#endif
/* Switch it into Query Mode */
- cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
-
+ cfi_qry_mode_on(base, map, cfi);
/* Read in the Extended Query Table */
for (i=0; i<size; i++) {
((unsigned char *)extp)[i] =
@@ -57,8 +116,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
}
/* Make sure it returns to read mode */
- cfi_send_gen_cmd(0xf0, 0, base, map, cfi, cfi->device_type, NULL);
- cfi_send_gen_cmd(0xff, 0, base, map, cfi, cfi->device_type, NULL);
+ cfi_qry_mode_off(base, map, cfi);
#ifdef CONFIG_MTD_XIP
(void) map_read(map, base);
diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c
index f061885b281..e2dc96441e0 100644
--- a/drivers/mtd/chips/gen_probe.c
+++ b/drivers/mtd/chips/gen_probe.c
@@ -111,7 +111,7 @@ static struct cfi_private *genprobe_ident_chips(struct map_info *map, struct chi
max_chips = 1;
}
- mapsize = sizeof(long) * ( (max_chips + BITS_PER_LONG-1) / BITS_PER_LONG );
+ mapsize = sizeof(long) * DIV_ROUND_UP(max_chips, BITS_PER_LONG);
chip_map = kzalloc(mapsize, GFP_KERNEL);
if (!chip_map) {
printk(KERN_WARNING "%s: kmalloc failed for CFI chip map\n", map->name);
diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c
index 71bc07f149b..50a340388e7 100644
--- a/drivers/mtd/cmdlinepart.c
+++ b/drivers/mtd/cmdlinepart.c
@@ -7,6 +7,7 @@
*
* mtdparts=<mtddef>[;<mtddef]
* <mtddef> := <mtd-id>:<partdef>[,<partdef>]
+ * where <mtd-id> is the name from the "cat /proc/mtd" command
* <partdef> := <size>[@offset][<name>][ro][lk]
* <mtd-id> := unique name used in mapping driver/device (mtd->name)
* <size> := standard linux memsize OR "-" to denote all remaining space
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 9c613f06623..6fde0a2e356 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -59,6 +59,27 @@ config MTD_DATAFLASH
Sometimes DataFlash chips are packaged inside MMC-format
cards; at this writing, the MMC stack won't handle those.
+config MTD_DATAFLASH_WRITE_VERIFY
+ bool "Verify DataFlash page writes"
+ depends on MTD_DATAFLASH
+ help
+ This adds an extra check when data is written to the flash.
+ It may help if you are verifying chip setup (timings etc) on
+ your board. There is a rare possibility that even though the
+ device thinks the write was successful, a bit could have been
+ flipped accidentally due to device wear or something else.
+
+config MTD_DATAFLASH_OTP
+ bool "DataFlash OTP support (Security Register)"
+ depends on MTD_DATAFLASH
+ select HAVE_MTD_OTP
+ help
+ Newer DataFlash chips (revisions C and D) support 128 bytes of
+ one-time-programmable (OTP) data. The first half may be written
+ (once) with up to 64 bytes of data, such as a serial number or
+ other key product data. The second half is programmed with a
+ unique-to-each-chip bit pattern at the factory.
+
config MTD_M25P80
tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
depends on SPI_MASTER && EXPERIMENTAL
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index b35c3333e21..76a76751da3 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -39,6 +39,7 @@
#define OPCODE_PP 0x02 /* Page program (up to 256 bytes) */
#define OPCODE_BE_4K 0x20 /* Erase 4KiB block */
#define OPCODE_BE_32K 0x52 /* Erase 32KiB block */
+#define OPCODE_BE 0xc7 /* Erase whole flash block */
#define OPCODE_SE 0xd8 /* Sector erase (usually 64KiB) */
#define OPCODE_RDID 0x9f /* Read JEDEC ID */
@@ -161,6 +162,31 @@ static int wait_till_ready(struct m25p *flash)
return 1;
}
+/*
+ * Erase the whole flash memory
+ *
+ * Returns 0 if successful, non-zero otherwise.
+ */
+static int erase_block(struct m25p *flash)
+{
+ DEBUG(MTD_DEBUG_LEVEL3, "%s: %s %dKiB\n",
+ flash->spi->dev.bus_id, __func__,
+ flash->mtd.size / 1024);
+
+ /* Wait until finished previous write command. */
+ if (wait_till_ready(flash))
+ return 1;
+
+ /* Send write enable, then erase commands. */
+ write_enable(flash);
+
+ /* Set up command buffer. */
+ flash->command[0] = OPCODE_BE;
+
+ spi_write(flash->spi, flash->command, 1);
+
+ return 0;
+}
/*
* Erase one sector of flash memory at offset ``offset'' which is any
@@ -229,15 +255,21 @@ static int m25p80_erase(struct mtd_info *mtd, struct erase_info *instr)
*/
/* now erase those sectors */
- while (len) {
- if (erase_sector(flash, addr)) {
- instr->state = MTD_ERASE_FAILED;
- mutex_unlock(&flash->lock);
- return -EIO;
- }
+ if (len == flash->mtd.size && erase_block(flash)) {
+ instr->state = MTD_ERASE_FAILED;
+ mutex_unlock(&flash->lock);
+ return -EIO;
+ } else {
+ while (len) {
+ if (erase_sector(flash, addr)) {
+ instr->state = MTD_ERASE_FAILED;
+ mutex_unlock(&flash->lock);
+ return -EIO;
+ }
- addr += mtd->erasesize;
- len -= mtd->erasesize;
+ addr += mtd->erasesize;
+ len -= mtd->erasesize;
+ }
}
mutex_unlock(&flash->lock);
@@ -437,6 +469,7 @@ struct flash_info {
* then a two byte device id.
*/
u32 jedec_id;
+ u16 ext_id;
/* The size listed here is what works with OPCODE_SE, which isn't
* necessarily called a "sector" by the vendor.
@@ -456,72 +489,75 @@ struct flash_info {
static struct flash_info __devinitdata m25p_data [] = {
/* Atmel -- some are (confusingly) marketed as "DataFlash" */
- { "at25fs010", 0x1f6601, 32 * 1024, 4, SECT_4K, },
- { "at25fs040", 0x1f6604, 64 * 1024, 8, SECT_4K, },
+ { "at25fs010", 0x1f6601, 0, 32 * 1024, 4, SECT_4K, },
+ { "at25fs040", 0x1f6604, 0, 64 * 1024, 8, SECT_4K, },
- { "at25df041a", 0x1f4401, 64 * 1024, 8, SECT_4K, },
- { "at25df641", 0x1f4800, 64 * 1024, 128, SECT_4K, },
+ { "at25df041a", 0x1f4401, 0, 64 * 1024, 8, SECT_4K, },
+ { "at25df641", 0x1f4800, 0, 64 * 1024, 128, SECT_4K, },
- { "at26f004", 0x1f0400, 64 * 1024, 8, SECT_4K, },
- { "at26df081a", 0x1f4501, 64 * 1024, 16, SECT_4K, },
- { "at26df161a", 0x1f4601, 64 * 1024, 32, SECT_4K, },
- { "at26df321", 0x1f4701, 64 * 1024, 64, SECT_4K, },
+ { "at26f004", 0x1f0400, 0, 64 * 1024, 8, SECT_4K, },
+ { "at26df081a", 0x1f4501, 0, 64 * 1024, 16, SECT_4K, },
+ { "at26df161a", 0x1f4601, 0, 64 * 1024, 32, SECT_4K, },
+ { "at26df321", 0x1f4701, 0, 64 * 1024, 64, SECT_4K, },
/* Spansion -- single (large) sector size only, at least
* for the chips listed here (without boot sectors).
*/
- { "s25sl004a", 0x010212, 64 * 1024, 8, },
- { "s25sl008a", 0x010213, 64 * 1024, 16, },
- { "s25sl016a", 0x010214, 64 * 1024, 32, },
- { "s25sl032a", 0x010215, 64 * 1024, 64, },
- { "s25sl064a", 0x010216, 64 * 1024, 128, },
+ { "s25sl004a", 0x010212, 0, 64 * 1024, 8, },
+ { "s25sl008a", 0x010213, 0, 64 * 1024, 16, },
+ { "s25sl016a", 0x010214, 0, 64 * 1024, 32, },
+ { "s25sl032a", 0x010215, 0, 64 * 1024, 64, },
+ { "s25sl064a", 0x010216, 0, 64 * 1024, 128, },
+ { "s25sl12800", 0x012018, 0x0300, 256 * 1024, 64, },
+ { "s25sl12801", 0x012018, 0x0301, 64 * 1024, 256, },
/* SST -- large erase sizes are "overlays", "sectors" are 4K */
- { "sst25vf040b", 0xbf258d, 64 * 1024, 8, SECT_4K, },
- { "sst25vf080b", 0xbf258e, 64 * 1024, 16, SECT_4K, },
- { "sst25vf016b", 0xbf2541, 64 * 1024, 32, SECT_4K, },
- { "sst25vf032b", 0xbf254a, 64 * 1024, 64, SECT_4K, },
+ { "sst25vf040b", 0xbf258d, 0, 64 * 1024, 8, SECT_4K, },
+ { "sst25vf080b", 0xbf258e, 0, 64 * 1024, 16, SECT_4K, },
+ { "sst25vf016b", 0xbf2541, 0, 64 * 1024, 32, SECT_4K, },
+ { "sst25vf032b", 0xbf254a, 0, 64 * 1024, 64, SECT_4K, },
/* ST Microelectronics -- newer production may have feature updates */
- { "m25p05", 0x202010, 32 * 1024, 2, },
- { "m25p10", 0x202011, 32 * 1024, 4, },
- { "m25p20", 0x202012, 64 * 1024, 4, },
- { "m25p40", 0x202013, 64 * 1024, 8, },
- { "m25p80", 0, 64 * 1024, 16, },
- { "m25p16", 0x202015, 64 * 1024, 32, },
- { "m25p32", 0x202016, 64 * 1024, 64, },
- { "m25p64", 0x202017, 64 * 1024, 128, },
- { "m25p128", 0x202018, 256 * 1024, 64, },
-
- { "m45pe80", 0x204014, 64 * 1024, 16, },
- { "m45pe16", 0x204015, 64 * 1024, 32, },
-
- { "m25pe80", 0x208014, 64 * 1024, 16, },
- { "m25pe16", 0x208015, 64 * 1024, 32, SECT_4K, },
+ { "m25p05", 0x202010, 0, 32 * 1024, 2, },
+ { "m25p10", 0x202011, 0, 32 * 1024, 4, },
+ { "m25p20", 0x202012, 0, 64 * 1024, 4, },
+ { "m25p40", 0x202013, 0, 64 * 1024, 8, },
+ { "m25p80", 0, 0, 64 * 1024, 16, },
+ { "m25p16", 0x202015, 0, 64 * 1024, 32, },
+ { "m25p32", 0x202016, 0, 64 * 1024, 64, },
+ { "m25p64", 0x202017, 0, 64 * 1024, 128, },
+ { "m25p128", 0x202018, 0, 256 * 1024, 64, },
+
+ { "m45pe80", 0x204014, 0, 64 * 1024, 16, },
+ { "m45pe16", 0x204015, 0, 64 * 1024, 32, },
+
+ { "m25pe80", 0x208014, 0, 64 * 1024, 16, },
+ { "m25pe16", 0x208015, 0, 64 * 1024, 32, SECT_4K, },
/* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
- { "w25x10", 0xef3011, 64 * 1024, 2, SECT_4K, },
- { "w25x20", 0xef3012, 64 * 1024, 4, SECT_4K, },
- { "w25x40", 0xef3013, 64 * 1024, 8, SECT_4K, },
- { "w25x80", 0xef3014, 64 * 1024, 16, SECT_4K, },
- { "w25x16", 0xef3015, 64 * 1024, 32, SECT_4K, },
- { "w25x32", 0xef3016, 64 * 1024, 64, SECT_4K, },
- { "w25x64", 0xef3017, 64 * 1024, 128, SECT_4K, },
+ { "w25x10", 0xef3011, 0, 64 * 1024, 2, SECT_4K, },
+ { "w25x20", 0xef3012, 0, 64 * 1024, 4, SECT_4K, },
+ { "w25x40", 0xef3013, 0, 64 * 1024, 8, SECT_4K, },
+ { "w25x80", 0xef3014, 0, 64 * 1024, 16, SECT_4K, },
+ { "w25x16", 0xef3015, 0, 64 * 1024, 32, SECT_4K, },
+ { "w25x32", 0xef3016, 0, 64 * 1024, 64, SECT_4K, },
+ { "w25x64", 0xef3017, 0, 64 * 1024, 128, SECT_4K, },
};
static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
{
int tmp;
u8 code = OPCODE_RDID;
- u8 id[3];
+ u8 id[5];
u32 jedec;
+ u16 ext_jedec;
struct flash_info *info;
/* JEDEC also defines an optional "extended device information"
* string for after vendor-specific data, after the three bytes
* we use here. Supporting some chips might require using it.
*/
- tmp = spi_write_then_read(spi, &code, 1, id, 3);
+ tmp = spi_write_then_read(spi, &code, 1, id, 5);
if (tmp < 0) {
DEBUG(MTD_DEBUG_LEVEL0, "%s: error %d reading JEDEC ID\n",
spi->dev.bus_id, tmp);
@@ -533,10 +569,14 @@ static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
jedec = jedec << 8;
jedec |= id[2];
+ ext_jedec = id[3] << 8 | id[4];
+
for (tmp = 0, info = m25p_data;
tmp < ARRAY_SIZE(m25p_data);
tmp++, info++) {
if (info->jedec_id == jedec)
+ if (ext_jedec != 0 && info->ext_id != ext_jedec)
+ continue;
return info;
}
dev_err(&spi->dev, "unrecognized JEDEC id %06x\n", jedec);
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 8bd0dea6885..6dd9aff8bb2 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -30,12 +30,10 @@
* doesn't (yet) use these for any kind of i/o overlap or prefetching.
*
* Sometimes DataFlash is packaged in MMC-format cards, although the
- * MMC stack can't use SPI (yet), or distinguish between MMC and DataFlash
+ * MMC stack can't (yet?) distinguish between MMC and DataFlash
* protocols during enumeration.
*/
-#define CONFIG_DATAFLASH_WRITE_VERIFY
-
/* reads can bypass the buffers */
#define OP_READ_CONTINUOUS 0xE8
#define OP_READ_PAGE 0xD2
@@ -80,7 +78,8 @@
*/
#define OP_READ_ID 0x9F
#define OP_READ_SECURITY 0x77
-#define OP_WRITE_SECURITY 0x9A /* OTP bits */
+#define OP_WRITE_SECURITY_REVC 0x9A
+#define OP_WRITE_SECURITY 0x9B /* revision D */
struct dataflash {
@@ -402,7 +401,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
(void) dataflash_waitready(priv->spi);
-#ifdef CONFIG_DATAFLASH_WRITE_VERIFY
+#ifdef CONFIG_MTD_DATAFLASH_VERIFY_WRITE
/* (3) Compare to Buffer1 */
addr = pageaddr << priv->page_offset;
@@ -431,7 +430,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
} else
status = 0;
-#endif /* CONFIG_DATAFLASH_WRITE_VERIFY */
+#endif /* CONFIG_MTD_DATAFLASH_VERIFY_WRITE */
remaining = remaining - writelen;
pageaddr++;
@@ -451,16 +450,192 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
/* ......................................................................... */
+#ifdef CONFIG_MTD_DATAFLASH_OTP
+
+static int dataflash_get_otp_info(struct mtd_info *mtd,
+ struct otp_info *info, size_t len)
+{
+ /* Report both blocks as identical: bytes 0..64, locked.
+ * Unless the user block changed from all-ones, we can't
+ * tell whether it's still writable; so we assume it isn't.
+ */
+ info->start = 0;
+ info->length = 64;
+ info->locked = 1;
+ return sizeof(*info);
+}
+
+static ssize_t otp_read(struct spi_device *spi, unsigned base,
+ uint8_t *buf, loff_t off, size_t len)
+{
+ struct spi_message m;
+ size_t l;
+ uint8_t *scratch;
+ struct spi_transfer t;
+ int status;
+
+ if (off > 64)
+ return -EINVAL;
+
+ if ((off + len) > 64)
+ len = 64 - off;
+ if (len == 0)
+ return len;
+
+ spi_message_init(&m);
+
+ l = 4 + base + off + len;
+ scratch = kzalloc(l, GFP_KERNEL);
+ if (!scratch)
+ return -ENOMEM;
+
+ /* OUT: OP_READ_SECURITY, 3 don't-care bytes, zeroes
+ * IN: ignore 4 bytes, data bytes 0..N (max 127)
+ */
+ scratch[0] = OP_READ_SECURITY;
+
+ memset(&t, 0, sizeof t);
+ t.tx_buf = scratch;
+ t.rx_buf = scratch;
+ t.len = l;
+ spi_message_add_tail(&t, &m);
+
+ dataflash_waitready(spi);
+
+ status = spi_sync(spi, &m);
+ if (status >= 0) {
+ memcpy(buf, scratch + 4 + base + off, len);
+ status = len;
+ }
+
+ kfree(scratch);
+ return status;
+}
+
+static int dataflash_read_fact_otp(struct mtd_info *mtd,
+ loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+ struct dataflash *priv = (struct dataflash *)mtd->priv;
+ int status;
+
+ /* 64 bytes, from 0..63 ... start at 64 on-chip */
+ mutex_lock(&priv->lock);
+ status = otp_read(priv->spi, 64, buf, from, len);
+ mutex_unlock(&priv->lock);
+
+ if (status < 0)
+ return status;
+ *retlen = status;
+ return 0;
+}
+
+static int dataflash_read_user_otp(struct mtd_info *mtd,
+ loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+ struct dataflash *priv = (struct dataflash *)mtd->priv;
+ int status;
+
+ /* 64 bytes, from 0..63 ... start at 0 on-chip */
+ mutex_lock(&priv->lock);
+ status = otp_read(priv->spi, 0, buf, from, len);
+ mutex_unlock(&priv->lock);
+
+ if (status < 0)
+ return status;
+ *retlen = status;
+ return 0;
+}
+
+static int dataflash_write_user_otp(struct mtd_info *mtd,
+ loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+ struct spi_message m;
+ const size_t l = 4 + 64;
+ uint8_t *scratch;
+ struct spi_transfer t;
+ struct dataflash *priv = (struct dataflash *)mtd->priv;
+ int status;
+
+ if (len > 64)
+ return -EINVAL;
+
+ /* Strictly speaking, we *could* truncate the write ... but
+ * let's not do that for the only write that's ever possible.
+ */
+ if ((from + len) > 64)
+ return -EINVAL;
+
+ /* OUT: OP_WRITE_SECURITY, 3 zeroes, 64 data-or-zero bytes
+ * IN: ignore all
+ */
+ scratch = kzalloc(l, GFP_KERNEL);
+ if (!scratch)
+ return -ENOMEM;
+ scratch[0] = OP_WRITE_SECURITY;
+ memcpy(scratch + 4 + from, buf, len);
+
+ spi_message_init(&m);
+
+ memset(&t, 0, sizeof t);
+ t.tx_buf = scratch;
+ t.len = l;
+ spi_message_add_tail(&t, &m);
+
+ /* Write the OTP bits, if they've not yet been written.
+ * This modifies SRAM buffer1.
+ */
+ mutex_lock(&priv->lock);
+ dataflash_waitready(priv->spi);
+ status = spi_sync(priv->spi, &m);
+ mutex_unlock(&priv->lock);
+
+ kfree(scratch);
+
+ if (status >= 0) {
+ status = 0;
+ *retlen = len;
+ }
+ return status;
+}
+
+static char *otp_setup(struct mtd_info *device, char revision)
+{
+ device->get_fact_prot_info = dataflash_get_otp_info;
+ device->read_fact_prot_reg = dataflash_read_fact_otp;
+ device->get_user_prot_info = dataflash_get_otp_info;
+ device->read_user_prot_reg = dataflash_read_user_otp;
+
+ /* rev c parts (at45db321c and at45db1281 only!) use a
+ * different write procedure; not (yet?) implemented.
+ */
+ if (revision > 'c')
+ device->write_user_prot_reg = dataflash_write_user_otp;
+
+ return ", OTP";
+}
+
+#else
+
+static char *otp_setup(struct mtd_info *device, char revision)
+{
+ return " (OTP)";
+}
+
+#endif
+
+/* ......................................................................... */
+
/*
* Register DataFlash device with MTD subsystem.
*/
static int __devinit
-add_dataflash(struct spi_device *spi, char *name,
- int nr_pages, int pagesize, int pageoffset)
+add_dataflash_otp(struct spi_device *spi, char *name,
+ int nr_pages, int pagesize, int pageoffset, char revision)
{
struct dataflash *priv;
struct mtd_info *device;
struct flash_platform_data *pdata = spi->dev.platform_data;
+ char *otp_tag = "";
priv = kzalloc(sizeof *priv, GFP_KERNEL);
if (!priv)
@@ -489,8 +664,12 @@ add_dataflash(struct spi_device *spi, char *name,
device->write = dataflash_write;
device->priv = priv;
- dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes\n",
- name, DIV_ROUND_UP(device->size, 1024), pagesize);
+ if (revision >= 'c')
+ otp_tag = otp_setup(device, revision);
+
+ dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes%s\n",
+ name, DIV_ROUND_UP(device->size, 1024),
+ pagesize, otp_tag);
dev_set_drvdata(&spi->dev, priv);
if (mtd_has_partitions()) {
@@ -519,6 +698,14 @@ add_dataflash(struct spi_device *spi, char *name,
return add_mtd_device(device) == 1 ? -ENODEV : 0;
}
+static inline int __devinit
+add_dataflash(struct spi_device *spi, char *name,
+ int nr_pages, int pagesize, int pageoffset)
+{
+ return add_dataflash_otp(spi, name, nr_pages, pagesize,
+ pageoffset, 0);
+}
+
struct flash_info {
char *name;
@@ -664,13 +851,16 @@ static int __devinit dataflash_probe(struct spi_device *spi)
* Try to detect dataflash by JEDEC ID.
* If it succeeds we know we have either a C or D part.
* D will support power of 2 pagesize option.
+ * Both support the security register, though with different
+ * write procedures.
*/
info = jedec_probe(spi);
if (IS_ERR(info))
return PTR_ERR(info);
if (info != NULL)
- return add_dataflash(spi, info->name, info->nr_pages,
- info->pagesize, info->pageoffset);
+ return add_dataflash_otp(spi, info->name, info->nr_pages,
+ info->pagesize, info->pageoffset,
+ (info->flags & SUP_POW2PS) ? 'd' : 'c');
/*
* Older chips support only legacy commands, identifing
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index c4f9d3378b2..50ce13887f6 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -388,6 +388,10 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
if (thisEUN == targetEUN)
break;
+ /* Unlink the last block from the chain. */
+ inftl->PUtable[prevEUN] = BLOCK_NIL;
+
+ /* Now try to erase it. */
if (INFTL_formatblock(inftl, thisEUN) < 0) {
/*
* Could not erase : mark block as reserved.
@@ -396,7 +400,6 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
} else {
/* Correctly erased : mark it as free */
inftl->PUtable[thisEUN] = BLOCK_FREE;
- inftl->PUtable[prevEUN] = BLOCK_NIL;
inftl->numfreeEUNs++;
}
}
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index df8e00bba07..5ea16936216 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -332,30 +332,6 @@ config MTD_CFI_FLAGADM
Mapping for the Flaga digital module. If you don't have one, ignore
this setting.
-config MTD_WALNUT
- tristate "Flash device mapped on IBM 405GP Walnut"
- depends on MTD_JEDECPROBE && WALNUT && !PPC_MERGE
- help
- This enables access routines for the flash chips on the IBM 405GP
- Walnut board. If you have one of these boards and would like to
- use the flash chips on it, say 'Y'.
-
-config MTD_EBONY
- tristate "Flash devices mapped on IBM 440GP Ebony"
- depends on MTD_JEDECPROBE && EBONY && !PPC_MERGE
- help
- This enables access routines for the flash chips on the IBM 440GP
- Ebony board. If you have one of these boards and would like to
- use the flash chips on it, say 'Y'.
-
-config MTD_OCOTEA
- tristate "Flash devices mapped on IBM 440GX Ocotea"
- depends on MTD_CFI && OCOTEA && !PPC_MERGE
- help
- This enables access routines for the flash chips on the IBM 440GX
- Ocotea board. If you have one of these boards and would like to
- use the flash chips on it, say 'Y'.
-
config MTD_REDWOOD
tristate "CFI Flash devices mapped on IBM Redwood"
depends on MTD_CFI && ( REDWOOD_4 || REDWOOD_5 || REDWOOD_6 )
@@ -458,13 +434,6 @@ config MTD_CEIVA
PhotoMax Digital Picture Frame.
If you have such a device, say 'Y'.
-config MTD_NOR_TOTO
- tristate "NOR Flash device on TOTO board"
- depends on ARCH_OMAP && OMAP_TOTO
- help
- This enables access to the NOR flash on the Texas Instruments
- TOTO board.
-
config MTD_H720X
tristate "Hynix evaluation board mappings"
depends on MTD_CFI && ( ARCH_H7201 || ARCH_H7202 )
@@ -522,7 +491,7 @@ config MTD_BFIN_ASYNC
config MTD_UCLINUX
tristate "Generic uClinux RAM/ROM filesystem support"
- depends on MTD_PARTITIONS && !MMU
+ depends on MTD_PARTITIONS && MTD_RAM && !MMU
help
Map driver to support image based filesystems for uClinux.
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index 6cda6df973e..6d9ba35caf1 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -50,12 +50,8 @@ obj-$(CONFIG_MTD_REDWOOD) += redwood.o
obj-$(CONFIG_MTD_UCLINUX) += uclinux.o
obj-$(CONFIG_MTD_NETtel) += nettel.o
obj-$(CONFIG_MTD_SCB2_FLASH) += scb2_flash.o
-obj-$(CONFIG_MTD_EBONY) += ebony.o
-obj-$(CONFIG_MTD_OCOTEA) += ocotea.o
-obj-$(CONFIG_MTD_WALNUT) += walnut.o
obj-$(CONFIG_MTD_H720X) += h720x-flash.o
obj-$(CONFIG_MTD_SBC8240) += sbc8240.o
-obj-$(CONFIG_MTD_NOR_TOTO) += omap-toto-flash.o
obj-$(CONFIG_MTD_IXP4XX) += ixp4xx.o
obj-$(CONFIG_MTD_IXP2000) += ixp2000.o
obj-$(CONFIG_MTD_WRSBC8260) += wr_sbc82xx_flash.o
diff --git a/drivers/mtd/maps/ebony.c b/drivers/mtd/maps/ebony.c
deleted file mode 100644
index d92b7c70d3e..00000000000
--- a/drivers/mtd/maps/ebony.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Mapping for Ebony user flash
- *
- * Matt Porter <mporter@kernel.crashing.org>
- *
- * Copyright 2002-2004 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm44x.h>
-#include <platforms/4xx/ebony.h>
-
-static struct mtd_info *flash;
-
-static struct map_info ebony_small_map = {
- .name = "Ebony small flash",
- .size = EBONY_SMALL_FLASH_SIZE,
- .bankwidth = 1,
-};
-
-static struct map_info ebony_large_map = {
- .name = "Ebony large flash",
- .size = EBONY_LARGE_FLASH_SIZE,
- .bankwidth = 1,
-};
-
-static struct mtd_partition ebony_small_partitions[] = {
- {
- .name = "OpenBIOS",
- .offset = 0x0,
- .size = 0x80000,
- }
-};
-
-static struct mtd_partition ebony_large_partitions[] = {
- {
- .name = "fs",
- .offset = 0,
- .size = 0x380000,
- },
- {
- .name = "firmware",
- .offset = 0x380000,
- .size = 0x80000,
- }
-};
-
-int __init init_ebony(void)
-{
- u8 fpga0_reg;
- u8 __iomem *fpga0_adr;
- unsigned long long small_flash_base, large_flash_base;
-
- fpga0_adr = ioremap64(EBONY_FPGA_ADDR, 16);
- if (!fpga0_adr)
- return -ENOMEM;
-
- fpga0_reg = readb(fpga0_adr);
- iounmap(fpga0_adr);
-
- if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
- !EBONY_FLASH_SEL(fpga0_reg))
- small_flash_base = EBONY_SMALL_FLASH_HIGH2;
- else if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
- EBONY_FLASH_SEL(fpga0_reg))
- small_flash_base = EBONY_SMALL_FLASH_HIGH1;
- else if (!EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
- !EBONY_FLASH_SEL(fpga0_reg))
- small_flash_base = EBONY_SMALL_FLASH_LOW2;
- else
- small_flash_base = EBONY_SMALL_FLASH_LOW1;
-
- if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
- !EBONY_ONBRD_FLASH_EN(fpga0_reg))
- large_flash_base = EBONY_LARGE_FLASH_LOW;
- else
- large_flash_base = EBONY_LARGE_FLASH_HIGH;
-
- ebony_small_map.phys = small_flash_base;
- ebony_small_map.virt = ioremap64(small_flash_base,
- ebony_small_map.size);
-
- if (!ebony_small_map.virt) {
- printk("Failed to ioremap flash\n");
- return -EIO;
- }
-
- simple_map_init(&ebony_small_map);
-
- flash = do_map_probe("jedec_probe", &ebony_small_map);
- if (flash) {
- flash->owner = THIS_MODULE;
- add_mtd_partitions(flash, ebony_small_partitions,
- ARRAY_SIZE(ebony_small_partitions));
- } else {
- printk("map probe failed for flash\n");
- iounmap(ebony_small_map.virt);
- return -ENXIO;
- }
-
- ebony_large_map.phys = large_flash_base;
- ebony_large_map.virt = ioremap64(large_flash_base,
- ebony_large_map.size);
-
- if (!ebony_large_map.virt) {
- printk("Failed to ioremap flash\n");
- iounmap(ebony_small_map.virt);
- return -EIO;
- }
-
- simple_map_init(&ebony_large_map);
-
- flash = do_map_probe("jedec_probe", &ebony_large_map);
- if (flash) {
- flash->owner = THIS_MODULE;
- add_mtd_partitions(flash, ebony_large_partitions,
- ARRAY_SIZE(ebony_large_partitions));
- } else {
- printk("map probe failed for flash\n");
- iounmap(ebony_small_map.virt);
- iounmap(ebony_large_map.virt);
- return -ENXIO;
- }
-
- return 0;
-}
-
-static void __exit cleanup_ebony(void)
-{
- if (flash) {
- del_mtd_partitions(flash);
- map_destroy(flash);
- }
-
- if (ebony_small_map.virt) {
- iounmap(ebony_small_map.virt);
- ebony_small_map.virt = NULL;
- }
-
- if (ebony_large_map.virt) {
- iounmap(ebony_large_map.virt);
- ebony_large_map.virt = NULL;
- }
-}
-
-module_init(init_ebony);
-module_exit(cleanup_ebony);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 440GP Ebony boards");
diff --git a/drivers/mtd/maps/ocotea.c b/drivers/mtd/maps/ocotea.c
deleted file mode 100644
index 5522eac8c98..00000000000
--- a/drivers/mtd/maps/ocotea.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Mapping for Ocotea user flash
- *
- * Matt Porter <mporter@kernel.crashing.org>
- *
- * Copyright 2002-2004 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm44x.h>
-#include <platforms/4xx/ocotea.h>
-
-static struct mtd_info *flash;
-
-static struct map_info ocotea_small_map = {
- .name = "Ocotea small flash",
- .size = OCOTEA_SMALL_FLASH_SIZE,
- .buswidth = 1,
-};
-
-static struct map_info ocotea_large_map = {
- .name = "Ocotea large flash",
- .size = OCOTEA_LARGE_FLASH_SIZE,
- .buswidth = 1,
-};
-
-static struct mtd_partition ocotea_small_partitions[] = {
- {
- .name = "pibs",
- .offset = 0x0,
- .size = 0x100000,
- }
-};
-
-static struct mtd_partition ocotea_large_partitions[] = {
- {
- .name = "fs",
- .offset = 0,
- .size = 0x300000,
- },
- {
- .name = "firmware",
- .offset = 0x300000,
- .size = 0x100000,
- }
-};
-
-int __init init_ocotea(void)
-{
- u8 fpga0_reg;
- u8 *fpga0_adr;
- unsigned long long small_flash_base, large_flash_base;
-
- fpga0_adr = ioremap64(OCOTEA_FPGA_ADDR, 16);
- if (!fpga0_adr)
- return -ENOMEM;
-
- fpga0_reg = readb((unsigned long)fpga0_adr);
- iounmap(fpga0_adr);
-
- if (OCOTEA_BOOT_LARGE_FLASH(fpga0_reg)) {
- small_flash_base = OCOTEA_SMALL_FLASH_HIGH;
- large_flash_base = OCOTEA_LARGE_FLASH_LOW;
- }
- else {
- small_flash_base = OCOTEA_SMALL_FLASH_LOW;
- large_flash_base = OCOTEA_LARGE_FLASH_HIGH;
- }
-
- ocotea_small_map.phys = small_flash_base;
- ocotea_small_map.virt = ioremap64(small_flash_base,
- ocotea_small_map.size);
-
- if (!ocotea_small_map.virt) {
- printk("Failed to ioremap flash\n");
- return -EIO;
- }
-
- simple_map_init(&ocotea_small_map);
-
- flash = do_map_probe("map_rom", &ocotea_small_map);
- if (flash) {
- flash->owner = THIS_MODULE;
- add_mtd_partitions(flash, ocotea_small_partitions,
- ARRAY_SIZE(ocotea_small_partitions));
- } else {
- printk("map probe failed for flash\n");
- iounmap(ocotea_small_map.virt);
- return -ENXIO;
- }
-
- ocotea_large_map.phys = large_flash_base;
- ocotea_large_map.virt = ioremap64(large_flash_base,
- ocotea_large_map.size);
-
- if (!ocotea_large_map.virt) {
- printk("Failed to ioremap flash\n");
- iounmap(ocotea_small_map.virt);
- return -EIO;
- }
-
- simple_map_init(&ocotea_large_map);
-
- flash = do_map_probe("cfi_probe", &ocotea_large_map);
- if (flash) {
- flash->owner = THIS_MODULE;
- add_mtd_partitions(flash, ocotea_large_partitions,
- ARRAY_SIZE(ocotea_large_partitions));
- } else {
- printk("map probe failed for flash\n");
- iounmap(ocotea_small_map.virt);
- iounmap(ocotea_large_map.virt);
- return -ENXIO;
- }
-
- return 0;
-}
-
-static void __exit cleanup_ocotea(void)
-{
- if (flash) {
- del_mtd_partitions(flash);
- map_destroy(flash);
- }
-
- if (ocotea_small_map.virt) {
- iounmap((void *)ocotea_small_map.virt);
- ocotea_small_map.virt = 0;
- }
-
- if (ocotea_large_map.virt) {
- iounmap((void *)ocotea_large_map.virt);
- ocotea_large_map.virt = 0;
- }
-}
-
-module_init(init_ocotea);
-module_exit(cleanup_ocotea);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 440GX Ocotea boards");
diff --git a/drivers/mtd/maps/omap-toto-flash.c b/drivers/mtd/maps/omap-toto-flash.c
deleted file mode 100644
index 0a60ebbc217..00000000000
--- a/drivers/mtd/maps/omap-toto-flash.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * NOR Flash memory access on TI Toto board
- *
- * jzhang@ti.com (C) 2003 Texas Instruments.
- *
- * (C) 2002 MontVista Software, Inc.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-
-#include <asm/hardware.h>
-#include <asm/io.h>
-
-
-#ifndef CONFIG_ARCH_OMAP
-#error This is for OMAP architecture only
-#endif
-
-//these lines need be moved to a hardware header file
-#define OMAP_TOTO_FLASH_BASE 0xd8000000
-#define OMAP_TOTO_FLASH_SIZE 0x80000
-
-static struct map_info omap_toto_map_flash = {
- .name = "OMAP Toto flash",
- .bankwidth = 2,
- .virt = (void __iomem *)OMAP_TOTO_FLASH_BASE,
-};
-
-
-static struct mtd_partition toto_flash_partitions[] = {
- {
- .name = "BootLoader",
- .size = 0x00040000, /* hopefully u-boot will stay 128k + 128*/
- .offset = 0,
- .mask_flags = MTD_WRITEABLE, /* force read-only */
- }, {
- .name = "ReservedSpace",
- .size = 0x00030000,
- .offset = MTDPART_OFS_APPEND,
- //mask_flags: MTD_WRITEABLE, /* force read-only */
- }, {
- .name = "EnvArea", /* bottom 64KiB for env vars */
- .size = MTDPART_SIZ_FULL,
- .offset = MTDPART_OFS_APPEND,
- }
-};
-
-static struct mtd_partition *parsed_parts;
-
-static struct mtd_info *flash_mtd;
-
-static int __init init_flash (void)
-{
-
- struct mtd_partition *parts;
- int nb_parts = 0;
- int parsed_nr_parts = 0;
- const char *part_type;
-
- /*
- * Static partition definition selection
- */
- part_type = "static";
-
- parts = toto_flash_partitions;
- nb_parts = ARRAY_SIZE(toto_flash_partitions);
- omap_toto_map_flash.size = OMAP_TOTO_FLASH_SIZE;
- omap_toto_map_flash.phys = virt_to_phys(OMAP_TOTO_FLASH_BASE);
-
- simple_map_init(&omap_toto_map_flash);
- /*
- * Now let's probe for the actual flash. Do it here since
- * specific machine settings might have been set above.
- */
- printk(KERN_NOTICE "OMAP toto flash: probing %d-bit flash bus\n",
- omap_toto_map_flash.bankwidth*8);
- flash_mtd = do_map_probe("jedec_probe", &omap_toto_map_flash);
- if (!flash_mtd)
- return -ENXIO;
-
- if (parsed_nr_parts > 0) {
- parts = parsed_parts;
- nb_parts = parsed_nr_parts;
- }
-
- if (nb_parts == 0) {
- printk(KERN_NOTICE "OMAP toto flash: no partition info available,"
- "registering whole flash at once\n");
- if (add_mtd_device(flash_mtd)){
- return -ENXIO;
- }
- } else {
- printk(KERN_NOTICE "Using %s partition definition\n",
- part_type);
- return add_mtd_partitions(flash_mtd, parts, nb_parts);
- }
- return 0;
-}
-
-int __init omap_toto_mtd_init(void)
-{
- int status;
-
- if (status = init_flash()) {
- printk(KERN_ERR "OMAP Toto Flash: unable to init map for toto flash\n");
- }
- return status;
-}
-
-static void __exit omap_toto_mtd_cleanup(void)
-{
- if (flash_mtd) {
- del_mtd_partitions(flash_mtd);
- map_destroy(flash_mtd);
- kfree(parsed_parts);
- }
-}
-
-module_init(omap_toto_mtd_init);
-module_exit(omap_toto_mtd_cleanup);
-
-MODULE_AUTHOR("Jian Zhang");
-MODULE_DESCRIPTION("OMAP Toto board map driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/maps/pci.c b/drivers/mtd/maps/pci.c
index 5c6a25c9038..48f4cf5cb9d 100644
--- a/drivers/mtd/maps/pci.c
+++ b/drivers/mtd/maps/pci.c
@@ -203,15 +203,8 @@ intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
* not enabled, should we be allocating a new resource for it
* or simply enabling it?
*/
- if (!(pci_resource_flags(dev, PCI_ROM_RESOURCE) &
- IORESOURCE_ROM_ENABLE)) {
- u32 val;
- pci_resource_flags(dev, PCI_ROM_RESOURCE) |= IORESOURCE_ROM_ENABLE;
- pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
- val |= PCI_ROM_ADDRESS_ENABLE;
- pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
- printk("%s: enabling expansion ROM\n", pci_name(dev));
- }
+ pci_enable_rom(dev);
+ printk("%s: enabling expansion ROM\n", pci_name(dev));
}
if (!len || !base)
@@ -232,18 +225,13 @@ intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
static void
intel_dc21285_exit(struct pci_dev *dev, struct map_pci_info *map)
{
- u32 val;
-
if (map->base)
iounmap(map->base);
/*
* We need to undo the PCI BAR2/PCI ROM BAR address alteration.
*/
- pci_resource_flags(dev, PCI_ROM_RESOURCE) &= ~IORESOURCE_ROM_ENABLE;
- pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
- val &= ~PCI_ROM_ADDRESS_ENABLE;
- pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
+ pci_disable_rom(dev);
}
static unsigned long
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 49acd417189..5fcfec034a9 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -230,8 +230,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
#ifdef CONFIG_MTD_OF_PARTS
if (err == 0) {
- err = of_mtd_parse_partitions(&dev->dev, info->mtd,
- dp, &info->parts);
+ err = of_mtd_parse_partitions(&dev->dev, dp, &info->parts);
if (err < 0)
return err;
}
diff --git a/drivers/mtd/maps/walnut.c b/drivers/mtd/maps/walnut.c
deleted file mode 100644
index e243476c817..00000000000
--- a/drivers/mtd/maps/walnut.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Mapping for Walnut flash
- * (used ebony.c as a "framework")
- *
- * Heikki Lindholm <holindho@infradead.org>
- *
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm4xx.h>
-#include <platforms/4xx/walnut.h>
-
-/* these should be in platforms/4xx/walnut.h ? */
-#define WALNUT_FLASH_ONBD_N(x) (x & 0x02)
-#define WALNUT_FLASH_SRAM_SEL(x) (x & 0x01)
-#define WALNUT_FLASH_LOW 0xFFF00000
-#define WALNUT_FLASH_HIGH 0xFFF80000
-#define WALNUT_FLASH_SIZE 0x80000
-
-static struct mtd_info *flash;
-
-static struct map_info walnut_map = {
- .name = "Walnut flash",
- .size = WALNUT_FLASH_SIZE,
- .bankwidth = 1,
-};
-
-/* Actually, OpenBIOS is the last 128 KiB of the flash - better
- * partitioning could be made */
-static struct mtd_partition walnut_partitions[] = {
- {
- .name = "OpenBIOS",
- .offset = 0x0,
- .size = WALNUT_FLASH_SIZE,
- /*.mask_flags = MTD_WRITEABLE, */ /* force read-only */
- }
-};
-
-int __init init_walnut(void)
-{
- u8 fpga_brds1;
- void *fpga_brds1_adr;
- void *fpga_status_adr;
- unsigned long flash_base;
-
- /* this should already be mapped (platform/4xx/walnut.c) */
- fpga_status_adr = ioremap(WALNUT_FPGA_BASE, 8);
- if (!fpga_status_adr)
- return -ENOMEM;
-
- fpga_brds1_adr = fpga_status_adr+5;
- fpga_brds1 = readb(fpga_brds1_adr);
- /* iounmap(fpga_status_adr); */
-
- if (WALNUT_FLASH_ONBD_N(fpga_brds1)) {
- printk("The on-board flash is disabled (U79 sw 5)!");
- iounmap(fpga_status_adr);
- return -EIO;
- }
- if (WALNUT_FLASH_SRAM_SEL(fpga_brds1))
- flash_base = WALNUT_FLASH_LOW;
- else
- flash_base = WALNUT_FLASH_HIGH;
-
- walnut_map.phys = flash_base;
- walnut_map.virt =
- (void __iomem *)ioremap(flash_base, walnut_map.size);
-
- if (!walnut_map.virt) {
- printk("Failed to ioremap flash.\n");
- iounmap(fpga_status_adr);
- return -EIO;
- }
-
- simple_map_init(&walnut_map);
-
- flash = do_map_probe("jedec_probe", &walnut_map);
- if (flash) {
- flash->owner = THIS_MODULE;
- add_mtd_partitions(flash, walnut_partitions,
- ARRAY_SIZE(walnut_partitions));
- } else {
- printk("map probe failed for flash\n");
- iounmap(fpga_status_adr);
- return -ENXIO;
- }
-
- iounmap(fpga_status_adr);
- return 0;
-}
-
-static void __exit cleanup_walnut(void)
-{
- if (flash) {
- del_mtd_partitions(flash);
- map_destroy(flash);
- }
-
- if (walnut_map.virt) {
- iounmap((void *)walnut_map.virt);
- walnut_map.virt = 0;
- }
-}
-
-module_init(init_walnut);
-module_exit(cleanup_walnut);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Heikki Lindholm <holindho@infradead.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 405GP Walnut boards");
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 1c74762dec8..963840e9b5b 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -348,7 +348,7 @@ static void mtdchar_erase_callback (struct erase_info *instr)
wake_up((wait_queue_head_t *)instr->priv);
}
-#if defined(CONFIG_MTD_OTP) || defined(CONFIG_MTD_ONENAND_OTP)
+#ifdef CONFIG_HAVE_MTD_OTP
static int otp_select_filemode(struct mtd_file_info *mfi, int mode)
{
struct mtd_info *mtd = mfi->mtd;
@@ -665,7 +665,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file,
break;
}
-#if defined(CONFIG_MTD_OTP) || defined(CONFIG_MTD_ONENAND_OTP)
+#ifdef CONFIG_HAVE_MTD_OTP
case OTPSELECT:
{
int mode;
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 2972a5edb73..789842d0e6f 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -444,7 +444,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
return -EINVAL;
}
- instr->fail_addr = 0xffffffff;
+ instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
/* make a local copy of instr to avoid modifying the caller's struct */
erase = kmalloc(sizeof (struct erase_info), GFP_KERNEL);
@@ -493,7 +493,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
/* sanity check: should never happen since
* block alignment has been checked above */
BUG_ON(err == -EINVAL);
- if (erase->fail_addr != 0xffffffff)
+ if (erase->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
instr->fail_addr = erase->fail_addr + offset;
break;
}
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 5a680e1e61f..aebb3b27edb 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -33,6 +33,7 @@
#include <linux/interrupt.h>
#include <linux/mtd/mtd.h>
+#define MTDOOPS_KERNMSG_MAGIC 0x5d005d00
#define OOPS_PAGE_SIZE 4096
static struct mtdoops_context {
@@ -99,7 +100,7 @@ static void mtdoops_inc_counter(struct mtdoops_context *cxt)
int ret;
cxt->nextpage++;
- if (cxt->nextpage > cxt->oops_pages)
+ if (cxt->nextpage >= cxt->oops_pages)
cxt->nextpage = 0;
cxt->nextcount++;
if (cxt->nextcount == 0xffffffff)
@@ -141,7 +142,7 @@ static void mtdoops_workfunc_erase(struct work_struct *work)
mod = (cxt->nextpage * OOPS_PAGE_SIZE) % mtd->erasesize;
if (mod != 0) {
cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / OOPS_PAGE_SIZE);
- if (cxt->nextpage > cxt->oops_pages)
+ if (cxt->nextpage >= cxt->oops_pages)
cxt->nextpage = 0;
}
@@ -158,7 +159,7 @@ badblock:
cxt->nextpage * OOPS_PAGE_SIZE);
i++;
cxt->nextpage = cxt->nextpage + (mtd->erasesize / OOPS_PAGE_SIZE);
- if (cxt->nextpage > cxt->oops_pages)
+ if (cxt->nextpage >= cxt->oops_pages)
cxt->nextpage = 0;
if (i == (cxt->oops_pages / (mtd->erasesize / OOPS_PAGE_SIZE))) {
printk(KERN_ERR "mtdoops: All blocks bad!\n");
@@ -224,40 +225,40 @@ static void find_next_position(struct mtdoops_context *cxt)
{
struct mtd_info *mtd = cxt->mtd;
int ret, page, maxpos = 0;
- u32 count, maxcount = 0xffffffff;
+ u32 count[2], maxcount = 0xffffffff;
size_t retlen;
for (page = 0; page < cxt->oops_pages; page++) {
- ret = mtd->read(mtd, page * OOPS_PAGE_SIZE, 4, &retlen, (u_char *) &count);
- if ((retlen != 4) || ((ret < 0) && (ret != -EUCLEAN))) {
- printk(KERN_ERR "mtdoops: Read failure at %d (%td of 4 read)"
+ ret = mtd->read(mtd, page * OOPS_PAGE_SIZE, 8, &retlen, (u_char *) &count[0]);
+ if ((retlen != 8) || ((ret < 0) && (ret != -EUCLEAN))) {
+ printk(KERN_ERR "mtdoops: Read failure at %d (%td of 8 read)"
", err %d.\n", page * OOPS_PAGE_SIZE, retlen, ret);
continue;
}
- if (count == 0xffffffff)
+ if (count[1] != MTDOOPS_KERNMSG_MAGIC)
+ continue;
+ if (count[0] == 0xffffffff)
continue;
if (maxcount == 0xffffffff) {
- maxcount = count;
+ maxcount = count[0];
maxpos = page;
- } else if ((count < 0x40000000) && (maxcount > 0xc0000000)) {
- maxcount = count;
+ } else if ((count[0] < 0x40000000) && (maxcount > 0xc0000000)) {
+ maxcount = count[0];
maxpos = page;
- } else if ((count > maxcount) && (count < 0xc0000000)) {
- maxcount = count;
+ } else if ((count[0] > maxcount) && (count[0] < 0xc0000000)) {
+ maxcount = count[0];
maxpos = page;
- } else if ((count > maxcount) && (count > 0xc0000000)
+ } else if ((count[0] > maxcount) && (count[0] > 0xc0000000)
&& (maxcount > 0x80000000)) {
- maxcount = count;
+ maxcount = count[0];
maxpos = page;
}
}
if (maxcount == 0xffffffff) {
cxt->nextpage = 0;
cxt->nextcount = 1;
- cxt->ready = 1;
- printk(KERN_DEBUG "mtdoops: Ready %d, %d (first init)\n",
- cxt->nextpage, cxt->nextcount);
+ schedule_work(&cxt->work_erase);
return;
}
@@ -358,8 +359,9 @@ mtdoops_console_write(struct console *co, const char *s, unsigned int count)
if (cxt->writecount == 0) {
u32 *stamp = cxt->oops_buf;
- *stamp = cxt->nextcount;
- cxt->writecount = 4;
+ *stamp++ = cxt->nextcount;
+ *stamp = MTDOOPS_KERNMSG_MAGIC;
+ cxt->writecount = 8;
}
if ((count + cxt->writecount) > OOPS_PAGE_SIZE)
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 9a06dc93ee0..3728913fa5f 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -214,7 +214,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
instr->addr += part->offset;
ret = part->master->erase(part->master, instr);
if (ret) {
- if (instr->fail_addr != 0xffffffff)
+ if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
instr->fail_addr -= part->offset;
instr->addr -= part->offset;
}
@@ -226,7 +226,7 @@ void mtd_erase_callback(struct erase_info *instr)
if (instr->mtd->erase == part_erase) {
struct mtd_part *part = PART(instr->mtd);
- if (instr->fail_addr != 0xffffffff)
+ if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
instr->fail_addr -= part->offset;
instr->addr -= part->offset;
}
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 41f361c49b3..1c2e9450d66 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -56,6 +56,12 @@ config MTD_NAND_H1900
help
This enables the driver for the iPAQ h1900 flash.
+config MTD_NAND_GPIO
+ tristate "GPIO NAND Flash driver"
+ depends on GENERIC_GPIO && ARM
+ help
+ This enables a GPIO based NAND flash driver.
+
config MTD_NAND_SPIA
tristate "NAND Flash device on SPIA board"
depends on ARCH_P720T
@@ -68,12 +74,6 @@ config MTD_NAND_AMS_DELTA
help
Support for NAND flash on Amstrad E3 (Delta).
-config MTD_NAND_TOTO
- tristate "NAND Flash device on TOTO board"
- depends on ARCH_OMAP && BROKEN
- help
- Support for NAND flash on Texas Instruments Toto platform.
-
config MTD_NAND_TS7250
tristate "NAND Flash device on TS-7250 board"
depends on MACH_TS72XX
@@ -163,13 +163,6 @@ config MTD_NAND_S3C2410_HWECC
incorrect ECC generation, and if using these, the default of
software ECC is preferable.
-config MTD_NAND_NDFC
- tristate "NDFC NanD Flash Controller"
- depends on 4xx && !PPC_MERGE
- select MTD_NAND_ECC_SMC
- help
- NDFC Nand Flash Controllers are integrated in IBM/AMCC's 4xx SoCs
-
config MTD_NAND_S3C2410_CLKSTOP
bool "S3C2410 NAND IDLE clock stop"
depends on MTD_NAND_S3C2410
@@ -340,6 +333,13 @@ config MTD_NAND_PXA3xx
This enables the driver for the NAND flash device found on
PXA3xx processors
+config MTD_NAND_PXA3xx_BUILTIN
+ bool "Use builtin definitions for some NAND chips (deprecated)"
+ depends on MTD_NAND_PXA3xx
+ help
+ This enables builtin definitions for some NAND chips. This
+ is deprecated in favor of platform specific data.
+
config MTD_NAND_CM_X270
tristate "Support for NAND Flash on CM-X270 modules"
depends on MTD_NAND && MACH_ARMCORE
@@ -400,10 +400,24 @@ config MTD_NAND_FSL_ELBC
config MTD_NAND_FSL_UPM
tristate "Support for NAND on Freescale UPM"
- depends on MTD_NAND && OF_GPIO && (PPC_83xx || PPC_85xx)
+ depends on MTD_NAND && (PPC_83xx || PPC_85xx)
select FSL_LBC
help
Enables support for NAND Flash chips wired onto Freescale PowerPC
processor localbus with User-Programmable Machine support.
+config MTD_NAND_MXC
+ tristate "MXC NAND support"
+ depends on ARCH_MX2
+ help
+ This enables the driver for the NAND flash controller on the
+ MXC processors.
+
+config MTD_NAND_SH_FLCTL
+ tristate "Support for NAND on Renesas SuperH FLCTL"
+ depends on MTD_NAND && SUPERH && CPU_SUBTYPE_SH7723
+ help
+ Several Renesas SuperH CPU has FLCTL. This option enables support
+ for NAND Flash using FLCTL. This driver support SH7723.
+
endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index b786c5da82d..b661586afbf 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_MTD_NAND_IDS) += nand_ids.o
obj-$(CONFIG_MTD_NAND_CAFE) += cafe_nand.o
obj-$(CONFIG_MTD_NAND_SPIA) += spia.o
obj-$(CONFIG_MTD_NAND_AMS_DELTA) += ams-delta.o
-obj-$(CONFIG_MTD_NAND_TOTO) += toto.o
obj-$(CONFIG_MTD_NAND_AUTCPU12) += autcpu12.o
obj-$(CONFIG_MTD_NAND_EDB7312) += edb7312.o
obj-$(CONFIG_MTD_NAND_AU1550) += au1550nd.o
@@ -24,6 +23,7 @@ obj-$(CONFIG_MTD_NAND_NANDSIM) += nandsim.o
obj-$(CONFIG_MTD_NAND_CS553X) += cs553x_nand.o
obj-$(CONFIG_MTD_NAND_NDFC) += ndfc.o
obj-$(CONFIG_MTD_NAND_ATMEL) += atmel_nand.o
+obj-$(CONFIG_MTD_NAND_GPIO) += gpio.o
obj-$(CONFIG_MTD_NAND_CM_X270) += cmx270_nand.o
obj-$(CONFIG_MTD_NAND_BASLER_EXCITE) += excite_nandflash.o
obj-$(CONFIG_MTD_NAND_PXA3xx) += pxa3xx_nand.o
@@ -34,5 +34,7 @@ obj-$(CONFIG_MTD_NAND_PASEMI) += pasemi_nand.o
obj-$(CONFIG_MTD_NAND_ORION) += orion_nand.o
obj-$(CONFIG_MTD_NAND_FSL_ELBC) += fsl_elbc_nand.o
obj-$(CONFIG_MTD_NAND_FSL_UPM) += fsl_upm.o
+obj-$(CONFIG_MTD_NAND_SH_FLCTL) += sh_flctl.o
+obj-$(CONFIG_MTD_NAND_MXC) += mxc_nand.o
nand-objs := nand_base.o nand_bbt.o
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 3387e0d5076..c98c1570a40 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -174,48 +174,6 @@ static void atmel_write_buf16(struct mtd_info *mtd, const u8 *buf, int len)
}
/*
- * write oob for small pages
- */
-static int atmel_nand_write_oob_512(struct mtd_info *mtd,
- struct nand_chip *chip, int page)
-{
- int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad;
- int eccsize = chip->ecc.size, length = mtd->oobsize;
- int len, pos, status = 0;
- const uint8_t *bufpoi = chip->oob_poi;
-
- pos = eccsize + chunk;
-
- chip->cmdfunc(mtd, NAND_CMD_SEQIN, pos, page);
- len = min_t(int, length, chunk);
- chip->write_buf(mtd, bufpoi, len);
- bufpoi += len;
- length -= len;
- if (length > 0)
- chip->write_buf(mtd, bufpoi, length);
-
- chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
- status = chip->waitfunc(mtd, chip);
-
- return status & NAND_STATUS_FAIL ? -EIO : 0;
-
-}
-
-/*
- * read oob for small pages
- */
-static int atmel_nand_read_oob_512(struct mtd_info *mtd,
- struct nand_chip *chip, int page, int sndcmd)
-{
- if (sndcmd) {
- chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
- sndcmd = 0;
- }
- chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
- return sndcmd;
-}
-
-/*
* Calculate HW ECC
*
* function called after a write
@@ -235,14 +193,14 @@ static int atmel_nand_calculate(struct mtd_info *mtd,
/* get the first 2 ECC bytes */
ecc_value = ecc_readl(host->ecc, PR);
- ecc_code[eccpos[0]] = ecc_value & 0xFF;
- ecc_code[eccpos[1]] = (ecc_value >> 8) & 0xFF;
+ ecc_code[0] = ecc_value & 0xFF;
+ ecc_code[1] = (ecc_value >> 8) & 0xFF;
/* get the last 2 ECC bytes */
ecc_value = ecc_readl(host->ecc, NPR) & ATMEL_ECC_NPARITY;
- ecc_code[eccpos[2]] = ecc_value & 0xFF;
- ecc_code[eccpos[3]] = (ecc_value >> 8) & 0xFF;
+ ecc_code[2] = ecc_value & 0xFF;
+ ecc_code[3] = (ecc_value >> 8) & 0xFF;
return 0;
}
@@ -476,14 +434,12 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
res = -EIO;
goto err_ecc_ioremap;
}
- nand_chip->ecc.mode = NAND_ECC_HW_SYNDROME;
+ nand_chip->ecc.mode = NAND_ECC_HW;
nand_chip->ecc.calculate = atmel_nand_calculate;
nand_chip->ecc.correct = atmel_nand_correct;
nand_chip->ecc.hwctl = atmel_nand_hwctl;
nand_chip->ecc.read_page = atmel_nand_read_page;
nand_chip->ecc.bytes = 4;
- nand_chip->ecc.prepad = 0;
- nand_chip->ecc.postpad = 0;
}
nand_chip->chip_delay = 20; /* 20us command delay time */
@@ -514,7 +470,7 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
goto err_scan_ident;
}
- if (nand_chip->ecc.mode == NAND_ECC_HW_SYNDROME) {
+ if (nand_chip->ecc.mode == NAND_ECC_HW) {
/* ECC is calculated for the whole page (1 step) */
nand_chip->ecc.size = mtd->writesize;
@@ -522,8 +478,6 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
switch (mtd->writesize) {
case 512:
nand_chip->ecc.layout = &atmel_oobinfo_small;
- nand_chip->ecc.read_oob = atmel_nand_read_oob_512;
- nand_chip->ecc.write_oob = atmel_nand_write_oob_512;
ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_528);
break;
case 1024:
diff --git a/drivers/mtd/nand/cs553x_nand.c b/drivers/mtd/nand/cs553x_nand.c
index 3370a800fd3..9f1b451005c 100644
--- a/drivers/mtd/nand/cs553x_nand.c
+++ b/drivers/mtd/nand/cs553x_nand.c
@@ -289,8 +289,10 @@ static int __init cs553x_init(void)
int i;
uint64_t val;
+#ifdef CONFIG_MTD_PARTITIONS
int mtd_parts_nb = 0;
struct mtd_partition *mtd_parts = NULL;
+#endif
/* If the CPU isn't a Geode GX or LX, abort */
if (!is_geode())
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 98ad3cefcaf..4aa5bd6158d 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -918,8 +918,7 @@ static int __devinit fsl_elbc_chip_probe(struct fsl_elbc_ctrl *ctrl,
#ifdef CONFIG_MTD_OF_PARTS
if (ret == 0) {
- ret = of_mtd_parse_partitions(priv->dev, &priv->mtd,
- node, &parts);
+ ret = of_mtd_parse_partitions(priv->dev, node, &parts);
if (ret < 0)
goto err;
}
diff --git a/drivers/mtd/nand/fsl_upm.c b/drivers/mtd/nand/fsl_upm.c
index 1ebfd87f00b..024e3fffd4b 100644
--- a/drivers/mtd/nand/fsl_upm.c
+++ b/drivers/mtd/nand/fsl_upm.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/delay.h>
#include <linux/mtd/nand.h>
#include <linux/mtd/nand_ecc.h>
#include <linux/mtd/partitions.h>
@@ -36,8 +37,6 @@ struct fsl_upm_nand {
uint8_t upm_cmd_offset;
void __iomem *io_base;
int rnb_gpio;
- const uint32_t *wait_pattern;
- const uint32_t *wait_write;
int chip_delay;
};
@@ -61,10 +60,11 @@ static void fun_wait_rnb(struct fsl_upm_nand *fun)
if (fun->rnb_gpio >= 0) {
while (--cnt && !fun_chip_ready(&fun->mtd))
cpu_relax();
+ if (!cnt)
+ dev_err(fun->dev, "tired waiting for RNB\n");
+ } else {
+ ndelay(100);
}
-
- if (!cnt)
- dev_err(fun->dev, "tired waiting for RNB\n");
}
static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
@@ -89,8 +89,7 @@ static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
fsl_upm_run_pattern(&fun->upm, fun->io_base, cmd);
- if (fun->wait_pattern)
- fun_wait_rnb(fun);
+ fun_wait_rnb(fun);
}
static uint8_t fun_read_byte(struct mtd_info *mtd)
@@ -116,14 +115,16 @@ static void fun_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
for (i = 0; i < len; i++) {
out_8(fun->chip.IO_ADDR_W, buf[i]);
- if (fun->wait_write)
- fun_wait_rnb(fun);
+ fun_wait_rnb(fun);
}
}
-static int __devinit fun_chip_init(struct fsl_upm_nand *fun)
+static int __devinit fun_chip_init(struct fsl_upm_nand *fun,
+ const struct device_node *upm_np,
+ const struct resource *io_res)
{
int ret;
+ struct device_node *flash_np;
#ifdef CONFIG_MTD_PARTITIONS
static const char *part_types[] = { "cmdlinepart", NULL, };
#endif
@@ -143,18 +144,37 @@ static int __devinit fun_chip_init(struct fsl_upm_nand *fun)
fun->mtd.priv = &fun->chip;
fun->mtd.owner = THIS_MODULE;
+ flash_np = of_get_next_child(upm_np, NULL);
+ if (!flash_np)
+ return -ENODEV;
+
+ fun->mtd.name = kasprintf(GFP_KERNEL, "%x.%s", io_res->start,
+ flash_np->name);
+ if (!fun->mtd.name) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
ret = nand_scan(&fun->mtd, 1);
if (ret)
- return ret;
-
- fun->mtd.name = fun->dev->bus_id;
+ goto err;
#ifdef CONFIG_MTD_PARTITIONS
ret = parse_mtd_partitions(&fun->mtd, part_types, &fun->parts, 0);
+
+#ifdef CONFIG_MTD_OF_PARTS
+ if (ret == 0)
+ ret = of_mtd_parse_partitions(fun->dev, &fun->mtd,
+ flash_np, &fun->parts);
+#endif
if (ret > 0)
- return add_mtd_partitions(&fun->mtd, fun->parts, ret);
+ ret = add_mtd_partitions(&fun->mtd, fun->parts, ret);
+ else
#endif
- return add_mtd_device(&fun->mtd);
+ ret = add_mtd_device(&fun->mtd);
+err:
+ of_node_put(flash_np);
+ return ret;
}
static int __devinit fun_probe(struct of_device *ofdev,
@@ -211,6 +231,12 @@ static int __devinit fun_probe(struct of_device *ofdev,
goto err2;
}
+ prop = of_get_property(ofdev->node, "chip-delay", NULL);
+ if (prop)
+ fun->chip_delay = *prop;
+ else
+ fun->chip_delay = 50;
+
fun->io_base = devm_ioremap_nocache(&ofdev->dev, io_res.start,
io_res.end - io_res.start + 1);
if (!fun->io_base) {
@@ -220,17 +246,8 @@ static int __devinit fun_probe(struct of_device *ofdev,
fun->dev = &ofdev->dev;
fun->last_ctrl = NAND_CLE;
- fun->wait_pattern = of_get_property(ofdev->node, "fsl,wait-pattern",
- NULL);
- fun->wait_write = of_get_property(ofdev->node, "fsl,wait-write", NULL);
-
- prop = of_get_property(ofdev->node, "chip-delay", NULL);
- if (prop)
- fun->chip_delay = *prop;
- else
- fun->chip_delay = 50;
- ret = fun_chip_init(fun);
+ ret = fun_chip_init(fun, ofdev->node, &io_res);
if (ret)
goto err2;
@@ -251,6 +268,7 @@ static int __devexit fun_remove(struct of_device *ofdev)
struct fsl_upm_nand *fun = dev_get_drvdata(&ofdev->dev);
nand_release(&fun->mtd);
+ kfree(fun->mtd.name);
if (fun->rnb_gpio >= 0)
gpio_free(fun->rnb_gpio);
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
new file mode 100644
index 00000000000..8f902e75aa8
--- /dev/null
+++ b/drivers/mtd/nand/gpio.c
@@ -0,0 +1,375 @@
+/*
+ * drivers/mtd/nand/gpio.c
+ *
+ * Updated, and converted to generic GPIO based driver by Russell King.
+ *
+ * Written by Ben Dooks <ben@simtec.co.uk>
+ * Based on 2.4 version by Mark Whittaker
+ *
+ * © 2004 Simtec Electronics
+ *
+ * Device driver for NAND connected via GPIO
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/nand-gpio.h>
+
+struct gpiomtd {
+ void __iomem *io_sync;
+ struct mtd_info mtd_info;
+ struct nand_chip nand_chip;
+ struct gpio_nand_platdata plat;
+};
+
+#define gpio_nand_getpriv(x) container_of(x, struct gpiomtd, mtd_info)
+
+
+#ifdef CONFIG_ARM
+/* gpio_nand_dosync()
+ *
+ * Make sure the GPIO state changes occur in-order with writes to NAND
+ * memory region.
+ * Needed on PXA due to bus-reordering within the SoC itself (see section on
+ * I/O ordering in PXA manual (section 2.3, p35)
+ */
+static void gpio_nand_dosync(struct gpiomtd *gpiomtd)
+{
+ unsigned long tmp;
+
+ if (gpiomtd->io_sync) {
+ /*
+ * Linux memory barriers don't cater for what's required here.
+ * What's required is what's here - a read from a separate
+ * region with a dependency on that read.
+ */
+ tmp = readl(gpiomtd->io_sync);
+ asm volatile("mov %1, %0\n" : "=r" (tmp) : "r" (tmp));
+ }
+}
+#else
+static inline void gpio_nand_dosync(struct gpiomtd *gpiomtd) {}
+#endif
+
+static void gpio_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+{
+ struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
+
+ gpio_nand_dosync(gpiomtd);
+
+ if (ctrl & NAND_CTRL_CHANGE) {
+ gpio_set_value(gpiomtd->plat.gpio_nce, !(ctrl & NAND_NCE));
+ gpio_set_value(gpiomtd->plat.gpio_cle, !!(ctrl & NAND_CLE));
+ gpio_set_value(gpiomtd->plat.gpio_ale, !!(ctrl & NAND_ALE));
+ gpio_nand_dosync(gpiomtd);
+ }
+ if (cmd == NAND_CMD_NONE)
+ return;
+
+ writeb(cmd, gpiomtd->nand_chip.IO_ADDR_W);
+ gpio_nand_dosync(gpiomtd);
+}
+
+static void gpio_nand_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+ struct nand_chip *this = mtd->priv;
+
+ writesb(this->IO_ADDR_W, buf, len);
+}
+
+static void gpio_nand_readbuf(struct mtd_info *mtd, u_char *buf, int len)
+{
+ struct nand_chip *this = mtd->priv;
+
+ readsb(this->IO_ADDR_R, buf, len);
+}
+
+static int gpio_nand_verifybuf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+ struct nand_chip *this = mtd->priv;
+ unsigned char read, *p = (unsigned char *) buf;
+ int i, err = 0;
+
+ for (i = 0; i < len; i++) {
+ read = readb(this->IO_ADDR_R);
+ if (read != p[i]) {
+ pr_debug("%s: err at %d (read %04x vs %04x)\n",
+ __func__, i, read, p[i]);
+ err = -EFAULT;
+ }
+ }
+ return err;
+}
+
+static void gpio_nand_writebuf16(struct mtd_info *mtd, const u_char *buf,
+ int len)
+{
+ struct nand_chip *this = mtd->priv;
+
+ if (IS_ALIGNED((unsigned long)buf, 2)) {
+ writesw(this->IO_ADDR_W, buf, len>>1);
+ } else {
+ int i;
+ unsigned short *ptr = (unsigned short *)buf;
+
+ for (i = 0; i < len; i += 2, ptr++)
+ writew(*ptr, this->IO_ADDR_W);
+ }
+}
+
+static void gpio_nand_readbuf16(struct mtd_info *mtd, u_char *buf, int len)
+{
+ struct nand_chip *this = mtd->priv;
+
+ if (IS_ALIGNED((unsigned long)buf, 2)) {
+ readsw(this->IO_ADDR_R, buf, len>>1);
+ } else {
+ int i;
+ unsigned short *ptr = (unsigned short *)buf;
+
+ for (i = 0; i < len; i += 2, ptr++)
+ *ptr = readw(this->IO_ADDR_R);
+ }
+}
+
+static int gpio_nand_verifybuf16(struct mtd_info *mtd, const u_char *buf,
+ int len)
+{
+ struct nand_chip *this = mtd->priv;
+ unsigned short read, *p = (unsigned short *) buf;
+ int i, err = 0;
+ len >>= 1;
+
+ for (i = 0; i < len; i++) {
+ read = readw(this->IO_ADDR_R);
+ if (read != p[i]) {
+ pr_debug("%s: err at %d (read %04x vs %04x)\n",
+ __func__, i, read, p[i]);
+ err = -EFAULT;
+ }
+ }
+ return err;
+}
+
+
+static int gpio_nand_devready(struct mtd_info *mtd)
+{
+ struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
+ return gpio_get_value(gpiomtd->plat.gpio_rdy);
+}
+
+static int __devexit gpio_nand_remove(struct platform_device *dev)
+{
+ struct gpiomtd *gpiomtd = platform_get_drvdata(dev);
+ struct resource *res;
+
+ nand_release(&gpiomtd->mtd_info);
+
+ res = platform_get_resource(dev, IORESOURCE_MEM, 1);
+ iounmap(gpiomtd->io_sync);
+ if (res)
+ release_mem_region(res->start, res->end - res->start + 1);
+
+ res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+ iounmap(gpiomtd->nand_chip.IO_ADDR_R);
+ release_mem_region(res->start, res->end - res->start + 1);
+
+ if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+ gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
+ gpio_set_value(gpiomtd->plat.gpio_nce, 1);
+
+ gpio_free(gpiomtd->plat.gpio_cle);
+ gpio_free(gpiomtd->plat.gpio_ale);
+ gpio_free(gpiomtd->plat.gpio_nce);
+ if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+ gpio_free(gpiomtd->plat.gpio_nwp);
+ gpio_free(gpiomtd->plat.gpio_rdy);
+
+ kfree(gpiomtd);
+
+ return 0;
+}
+
+static void __iomem *request_and_remap(struct resource *res, size_t size,
+ const char *name, int *err)
+{
+ void __iomem *ptr;
+
+ if (!request_mem_region(res->start, res->end - res->start + 1, name)) {
+ *err = -EBUSY;
+ return NULL;
+ }
+
+ ptr = ioremap(res->start, size);
+ if (!ptr) {
+ release_mem_region(res->start, res->end - res->start + 1);
+ *err = -ENOMEM;
+ }
+ return ptr;
+}
+
+static int __devinit gpio_nand_probe(struct platform_device *dev)
+{
+ struct gpiomtd *gpiomtd;
+ struct nand_chip *this;
+ struct resource *res0, *res1;
+ int ret;
+
+ if (!dev->dev.platform_data)
+ return -EINVAL;
+
+ res0 = platform_get_resource(dev, IORESOURCE_MEM, 0);
+ if (!res0)
+ return -EINVAL;
+
+ gpiomtd = kzalloc(sizeof(*gpiomtd), GFP_KERNEL);
+ if (gpiomtd == NULL) {
+ dev_err(&dev->dev, "failed to create NAND MTD\n");
+ return -ENOMEM;
+ }
+
+ this = &gpiomtd->nand_chip;
+ this->IO_ADDR_R = request_and_remap(res0, 2, "NAND", &ret);
+ if (!this->IO_ADDR_R) {
+ dev_err(&dev->dev, "unable to map NAND\n");
+ goto err_map;
+ }
+
+ res1 = platform_get_resource(dev, IORESOURCE_MEM, 1);
+ if (res1) {
+ gpiomtd->io_sync = request_and_remap(res1, 4, "NAND sync", &ret);
+ if (!gpiomtd->io_sync) {
+ dev_err(&dev->dev, "unable to map sync NAND\n");
+ goto err_sync;
+ }
+ }
+
+ memcpy(&gpiomtd->plat, dev->dev.platform_data, sizeof(gpiomtd->plat));
+
+ ret = gpio_request(gpiomtd->plat.gpio_nce, "NAND NCE");
+ if (ret)
+ goto err_nce;
+ gpio_direction_output(gpiomtd->plat.gpio_nce, 1);
+ if (gpio_is_valid(gpiomtd->plat.gpio_nwp)) {
+ ret = gpio_request(gpiomtd->plat.gpio_nwp, "NAND NWP");
+ if (ret)
+ goto err_nwp;
+ gpio_direction_output(gpiomtd->plat.gpio_nwp, 1);
+ }
+ ret = gpio_request(gpiomtd->plat.gpio_ale, "NAND ALE");
+ if (ret)
+ goto err_ale;
+ gpio_direction_output(gpiomtd->plat.gpio_ale, 0);
+ ret = gpio_request(gpiomtd->plat.gpio_cle, "NAND CLE");
+ if (ret)
+ goto err_cle;
+ gpio_direction_output(gpiomtd->plat.gpio_cle, 0);
+ ret = gpio_request(gpiomtd->plat.gpio_rdy, "NAND RDY");
+ if (ret)
+ goto err_rdy;
+ gpio_direction_input(gpiomtd->plat.gpio_rdy);
+
+
+ this->IO_ADDR_W = this->IO_ADDR_R;
+ this->ecc.mode = NAND_ECC_SOFT;
+ this->options = gpiomtd->plat.options;
+ this->chip_delay = gpiomtd->plat.chip_delay;
+
+ /* install our routines */
+ this->cmd_ctrl = gpio_nand_cmd_ctrl;
+ this->dev_ready = gpio_nand_devready;
+
+ if (this->options & NAND_BUSWIDTH_16) {
+ this->read_buf = gpio_nand_readbuf16;
+ this->write_buf = gpio_nand_writebuf16;
+ this->verify_buf = gpio_nand_verifybuf16;
+ } else {
+ this->read_buf = gpio_nand_readbuf;
+ this->write_buf = gpio_nand_writebuf;
+ this->verify_buf = gpio_nand_verifybuf;
+ }
+
+ /* set the mtd private data for the nand driver */
+ gpiomtd->mtd_info.priv = this;
+ gpiomtd->mtd_info.owner = THIS_MODULE;
+
+ if (nand_scan(&gpiomtd->mtd_info, 1)) {
+ dev_err(&dev->dev, "no nand chips found?\n");
+ ret = -ENXIO;
+ goto err_wp;
+ }
+
+ if (gpiomtd->plat.adjust_parts)
+ gpiomtd->plat.adjust_parts(&gpiomtd->plat,
+ gpiomtd->mtd_info.size);
+
+ add_mtd_partitions(&gpiomtd->mtd_info, gpiomtd->plat.parts,
+ gpiomtd->plat.num_parts);
+ platform_set_drvdata(dev, gpiomtd);
+
+ return 0;
+
+err_wp:
+ if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+ gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
+ gpio_free(gpiomtd->plat.gpio_rdy);
+err_rdy:
+ gpio_free(gpiomtd->plat.gpio_cle);
+err_cle:
+ gpio_free(gpiomtd->plat.gpio_ale);
+err_ale:
+ if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+ gpio_free(gpiomtd->plat.gpio_nwp);
+err_nwp:
+ gpio_free(gpiomtd->plat.gpio_nce);
+err_nce:
+ iounmap(gpiomtd->io_sync);
+ if (res1)
+ release_mem_region(res1->start, res1->end - res1->start + 1);
+err_sync:
+ iounmap(gpiomtd->nand_chip.IO_ADDR_R);
+ release_mem_region(res0->start, res0->end - res0->start + 1);
+err_map:
+ kfree(gpiomtd);
+ return ret;
+}
+
+static struct platform_driver gpio_nand_driver = {
+ .probe = gpio_nand_probe,
+ .remove = gpio_nand_remove,
+ .driver = {
+ .name = "gpio-nand",
+ },
+};
+
+static int __init gpio_nand_init(void)
+{
+ printk(KERN_INFO "GPIO NAND driver, © 2004 Simtec Electronics\n");
+
+ return platform_driver_register(&gpio_nand_driver);
+}
+
+static void __exit gpio_nand_exit(void)
+{
+ platform_driver_unregister(&gpio_nand_driver);
+}
+
+module_init(gpio_nand_init);
+module_exit(gpio_nand_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
+MODULE_DESCRIPTION("GPIO NAND Driver");
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
new file mode 100644
index 00000000000..21fd4f1c480
--- /dev/null
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -0,0 +1,1077 @@
+/*
+ * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Sascha Hauer, kernel@pengutronix.de
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+
+#include <asm/mach/flash.h>
+#include <mach/mxc_nand.h>
+
+#define DRIVER_NAME "mxc_nand"
+
+/* Addresses for NFC registers */
+#define NFC_BUF_SIZE 0xE00
+#define NFC_BUF_ADDR 0xE04
+#define NFC_FLASH_ADDR 0xE06
+#define NFC_FLASH_CMD 0xE08
+#define NFC_CONFIG 0xE0A
+#define NFC_ECC_STATUS_RESULT 0xE0C
+#define NFC_RSLTMAIN_AREA 0xE0E
+#define NFC_RSLTSPARE_AREA 0xE10
+#define NFC_WRPROT 0xE12
+#define NFC_UNLOCKSTART_BLKADDR 0xE14
+#define NFC_UNLOCKEND_BLKADDR 0xE16
+#define NFC_NF_WRPRST 0xE18
+#define NFC_CONFIG1 0xE1A
+#define NFC_CONFIG2 0xE1C
+
+/* Addresses for NFC RAM BUFFER Main area 0 */
+#define MAIN_AREA0 0x000
+#define MAIN_AREA1 0x200
+#define MAIN_AREA2 0x400
+#define MAIN_AREA3 0x600
+
+/* Addresses for NFC SPARE BUFFER Spare area 0 */
+#define SPARE_AREA0 0x800
+#define SPARE_AREA1 0x810
+#define SPARE_AREA2 0x820
+#define SPARE_AREA3 0x830
+
+/* Set INT to 0, FCMD to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Command operation */
+#define NFC_CMD 0x1
+
+/* Set INT to 0, FADD to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Address operation */
+#define NFC_ADDR 0x2
+
+/* Set INT to 0, FDI to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Input operation */
+#define NFC_INPUT 0x4
+
+/* Set INT to 0, FDO to 001, rest to 0 in NFC_CONFIG2 Register
+ * for Data Output operation */
+#define NFC_OUTPUT 0x8
+
+/* Set INT to 0, FD0 to 010, rest to 0 in NFC_CONFIG2 Register
+ * for Read ID operation */
+#define NFC_ID 0x10
+
+/* Set INT to 0, FDO to 100, rest to 0 in NFC_CONFIG2 Register
+ * for Read Status operation */
+#define NFC_STATUS 0x20
+
+/* Set INT to 1, rest to 0 in NFC_CONFIG2 Register for Read
+ * Status operation */
+#define NFC_INT 0x8000
+
+#define NFC_SP_EN (1 << 2)
+#define NFC_ECC_EN (1 << 3)
+#define NFC_INT_MSK (1 << 4)
+#define NFC_BIG (1 << 5)
+#define NFC_RST (1 << 6)
+#define NFC_CE (1 << 7)
+#define NFC_ONE_CYCLE (1 << 8)
+
+struct mxc_nand_host {
+ struct mtd_info mtd;
+ struct nand_chip nand;
+ struct mtd_partition *parts;
+ struct device *dev;
+
+ void __iomem *regs;
+ int spare_only;
+ int status_request;
+ int pagesize_2k;
+ uint16_t col_addr;
+ struct clk *clk;
+ int clk_act;
+ int irq;
+
+ wait_queue_head_t irq_waitq;
+};
+
+/* Define delays in microsec for NAND device operations */
+#define TROP_US_DELAY 2000
+/* Macros to get byte and bit positions of ECC */
+#define COLPOS(x) ((x) >> 3)
+#define BITPOS(x) ((x) & 0xf)
+
+/* Define single bit Error positions in Main & Spare area */
+#define MAIN_SINGLEBIT_ERROR 0x4
+#define SPARE_SINGLEBIT_ERROR 0x1
+
+/* OOB placement block for use with hardware ecc generation */
+static struct nand_ecclayout nand_hw_eccoob_8 = {
+ .eccbytes = 5,
+ .eccpos = {6, 7, 8, 9, 10},
+ .oobfree = {{0, 5}, {11, 5}, }
+};
+
+static struct nand_ecclayout nand_hw_eccoob_16 = {
+ .eccbytes = 5,
+ .eccpos = {6, 7, 8, 9, 10},
+ .oobfree = {{0, 6}, {12, 4}, }
+};
+
+#ifdef CONFIG_MTD_PARTITIONS
+static const char *part_probes[] = { "RedBoot", "cmdlinepart", NULL };
+#endif
+
+static irqreturn_t mxc_nfc_irq(int irq, void *dev_id)
+{
+ struct mxc_nand_host *host = dev_id;
+
+ uint16_t tmp;
+
+ tmp = readw(host->regs + NFC_CONFIG1);
+ tmp |= NFC_INT_MSK; /* Disable interrupt */
+ writew(tmp, host->regs + NFC_CONFIG1);
+
+ wake_up(&host->irq_waitq);
+
+ return IRQ_HANDLED;
+}
+
+/* This function polls the NANDFC to wait for the basic operation to
+ * complete by checking the INT bit of config2 register.
+ */
+static void wait_op_done(struct mxc_nand_host *host, int max_retries,
+ uint16_t param, int useirq)
+{
+ uint32_t tmp;
+
+ if (useirq) {
+ if ((readw(host->regs + NFC_CONFIG2) & NFC_INT) == 0) {
+
+ tmp = readw(host->regs + NFC_CONFIG1);
+ tmp &= ~NFC_INT_MSK; /* Enable interrupt */
+ writew(tmp, host->regs + NFC_CONFIG1);
+
+ wait_event(host->irq_waitq,
+ readw(host->regs + NFC_CONFIG2) & NFC_INT);
+
+ tmp = readw(host->regs + NFC_CONFIG2);
+ tmp &= ~NFC_INT;
+ writew(tmp, host->regs + NFC_CONFIG2);
+ }
+ } else {
+ while (max_retries-- > 0) {
+ if (readw(host->regs + NFC_CONFIG2) & NFC_INT) {
+ tmp = readw(host->regs + NFC_CONFIG2);
+ tmp &= ~NFC_INT;
+ writew(tmp, host->regs + NFC_CONFIG2);
+ break;
+ }
+ udelay(1);
+ }
+ if (max_retries <= 0)
+ DEBUG(MTD_DEBUG_LEVEL0, "%s(%d): INT not set\n",
+ __func__, param);
+ }
+}
+
+/* This function issues the specified command to the NAND device and
+ * waits for completion. */
+static void send_cmd(struct mxc_nand_host *host, uint16_t cmd, int useirq)
+{
+ DEBUG(MTD_DEBUG_LEVEL3, "send_cmd(host, 0x%x, %d)\n", cmd, useirq);
+
+ writew(cmd, host->regs + NFC_FLASH_CMD);
+ writew(NFC_CMD, host->regs + NFC_CONFIG2);
+
+ /* Wait for operation to complete */
+ wait_op_done(host, TROP_US_DELAY, cmd, useirq);
+}
+
+/* This function sends an address (or partial address) to the
+ * NAND device. The address is used to select the source/destination for
+ * a NAND command. */
+static void send_addr(struct mxc_nand_host *host, uint16_t addr, int islast)
+{
+ DEBUG(MTD_DEBUG_LEVEL3, "send_addr(host, 0x%x %d)\n", addr, islast);
+
+ writew(addr, host->regs + NFC_FLASH_ADDR);
+ writew(NFC_ADDR, host->regs + NFC_CONFIG2);
+
+ /* Wait for operation to complete */
+ wait_op_done(host, TROP_US_DELAY, addr, islast);
+}
+
+/* This function requests the NANDFC to initate the transfer
+ * of data currently in the NANDFC RAM buffer to the NAND device. */
+static void send_prog_page(struct mxc_nand_host *host, uint8_t buf_id,
+ int spare_only)
+{
+ DEBUG(MTD_DEBUG_LEVEL3, "send_prog_page (%d)\n", spare_only);
+
+ /* NANDFC buffer 0 is used for page read/write */
+ writew(buf_id, host->regs + NFC_BUF_ADDR);
+
+ /* Configure spare or page+spare access */
+ if (!host->pagesize_2k) {
+ uint16_t config1 = readw(host->regs + NFC_CONFIG1);
+ if (spare_only)
+ config1 |= NFC_SP_EN;
+ else
+ config1 &= ~(NFC_SP_EN);
+ writew(config1, host->regs + NFC_CONFIG1);
+ }
+
+ writew(NFC_INPUT, host->regs + NFC_CONFIG2);
+
+ /* Wait for operation to complete */
+ wait_op_done(host, TROP_US_DELAY, spare_only, true);
+}
+
+/* Requests NANDFC to initated the transfer of data from the
+ * NAND device into in the NANDFC ram buffer. */
+static void send_read_page(struct mxc_nand_host *host, uint8_t buf_id,
+ int spare_only)
+{
+ DEBUG(MTD_DEBUG_LEVEL3, "send_read_page (%d)\n", spare_only);
+
+ /* NANDFC buffer 0 is used for page read/write */
+ writew(buf_id, host->regs + NFC_BUF_ADDR);
+
+ /* Configure spare or page+spare access */
+ if (!host->pagesize_2k) {
+ uint32_t config1 = readw(host->regs + NFC_CONFIG1);
+ if (spare_only)
+ config1 |= NFC_SP_EN;
+ else
+ config1 &= ~NFC_SP_EN;
+ writew(config1, host->regs + NFC_CONFIG1);
+ }
+
+ writew(NFC_OUTPUT, host->regs + NFC_CONFIG2);
+
+ /* Wait for operation to complete */
+ wait_op_done(host, TROP_US_DELAY, spare_only, true);
+}
+
+/* Request the NANDFC to perform a read of the NAND device ID. */
+static void send_read_id(struct mxc_nand_host *host)
+{
+ struct nand_chip *this = &host->nand;
+ uint16_t tmp;
+
+ /* NANDFC buffer 0 is used for device ID output */
+ writew(0x0, host->regs + NFC_BUF_ADDR);
+
+ /* Read ID into main buffer */
+ tmp = readw(host->regs + NFC_CONFIG1);
+ tmp &= ~NFC_SP_EN;
+ writew(tmp, host->regs + NFC_CONFIG1);
+
+ writew(NFC_ID, host->regs + NFC_CONFIG2);
+
+ /* Wait for operation to complete */
+ wait_op_done(host, TROP_US_DELAY, 0, true);
+
+ if (this->options & NAND_BUSWIDTH_16) {
+ void __iomem *main_buf = host->regs + MAIN_AREA0;
+ /* compress the ID info */
+ writeb(readb(main_buf + 2), main_buf + 1);
+ writeb(readb(main_buf + 4), main_buf + 2);
+ writeb(readb(main_buf + 6), main_buf + 3);
+ writeb(readb(main_buf + 8), main_buf + 4);
+ writeb(readb(main_buf + 10), main_buf + 5);
+ }
+}
+
+/* This function requests the NANDFC to perform a read of the
+ * NAND device status and returns the current status. */
+static uint16_t get_dev_status(struct mxc_nand_host *host)
+{
+ void __iomem *main_buf = host->regs + MAIN_AREA1;
+ uint32_t store;
+ uint16_t ret, tmp;
+ /* Issue status request to NAND device */
+
+ /* store the main area1 first word, later do recovery */
+ store = readl(main_buf);
+ /* NANDFC buffer 1 is used for device status to prevent
+ * corruption of read/write buffer on status requests. */
+ writew(1, host->regs + NFC_BUF_ADDR);
+
+ /* Read status into main buffer */
+ tmp = readw(host->regs + NFC_CONFIG1);
+ tmp &= ~NFC_SP_EN;
+ writew(tmp, host->regs + NFC_CONFIG1);
+
+ writew(NFC_STATUS, host->regs + NFC_CONFIG2);
+
+ /* Wait for operation to complete */
+ wait_op_done(host, TROP_US_DELAY, 0, true);
+
+ /* Status is placed in first word of main buffer */
+ /* get status, then recovery area 1 data */
+ ret = readw(main_buf);
+ writel(store, main_buf);
+
+ return ret;
+}
+
+/* This functions is used by upper layer to checks if device is ready */
+static int mxc_nand_dev_ready(struct mtd_info *mtd)
+{
+ /*
+ * NFC handles R/B internally. Therefore, this function
+ * always returns status as ready.
+ */
+ return 1;
+}
+
+static void mxc_nand_enable_hwecc(struct mtd_info *mtd, int mode)
+{
+ /*
+ * If HW ECC is enabled, we turn it on during init. There is
+ * no need to enable again here.
+ */
+}
+
+static int mxc_nand_correct_data(struct mtd_info *mtd, u_char *dat,
+ u_char *read_ecc, u_char *calc_ecc)
+{
+ struct nand_chip *nand_chip = mtd->priv;
+ struct mxc_nand_host *host = nand_chip->priv;
+
+ /*
+ * 1-Bit errors are automatically corrected in HW. No need for
+ * additional correction. 2-Bit errors cannot be corrected by
+ * HW ECC, so we need to return failure
+ */
+ uint16_t ecc_status = readw(host->regs + NFC_ECC_STATUS_RESULT);
+
+ if (((ecc_status & 0x3) == 2) || ((ecc_status >> 2) == 2)) {
+ DEBUG(MTD_DEBUG_LEVEL0,
+ "MXC_NAND: HWECC uncorrectable 2-bit ECC error\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int mxc_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
+ u_char *ecc_code)
+{
+ return 0;
+}
+
+static u_char mxc_nand_read_byte(struct mtd_info *mtd)
+{
+ struct nand_chip *nand_chip = mtd->priv;
+ struct mxc_nand_host *host = nand_chip->priv;
+ uint8_t ret = 0;
+ uint16_t col, rd_word;
+ uint16_t __iomem *main_buf = host->regs + MAIN_AREA0;
+ uint16_t __iomem *spare_buf = host->regs + SPARE_AREA0;
+
+ /* Check for status request */
+ if (host->status_request)
+ return get_dev_status(host) & 0xFF;
+
+ /* Get column for 16-bit access */
+ col = host->col_addr >> 1;
+
+ /* If we are accessing the spare region */
+ if (host->spare_only)
+ rd_word = readw(&spare_buf[col]);
+ else
+ rd_word = readw(&main_buf[col]);
+
+ /* Pick upper/lower byte of word from RAM buffer */
+ if (host->col_addr & 0x1)
+ ret = (rd_word >> 8) & 0xFF;
+ else
+ ret = rd_word & 0xFF;
+
+ /* Update saved column address */
+ host->col_addr++;
+
+ return ret;
+}
+
+static uint16_t mxc_nand_read_word(struct mtd_info *mtd)
+{
+ struct nand_chip *nand_chip = mtd->priv;
+ struct mxc_nand_host *host = nand_chip->priv;
+ uint16_t col, rd_word, ret;
+ uint16_t __iomem *p;
+
+ DEBUG(MTD_DEBUG_LEVEL3,
+ "mxc_nand_read_word(col = %d)\n", host->col_addr);
+
+ col = host->col_addr;
+ /* Adjust saved column address */
+ if (col < mtd->writesize && host->spare_only)
+ col += mtd->writesize;
+
+ if (col < mtd->writesize)
+ p = (host->regs + MAIN_AREA0) + (col >> 1);
+ else
+ p = (host->regs + SPARE_AREA0) + ((col - mtd->writesize) >> 1);
+
+ if (col & 1) {
+ rd_word = readw(p);
+ ret = (rd_word >> 8) & 0xff;
+ rd_word = readw(&p[1]);
+ ret |= (rd_word << 8) & 0xff00;
+
+ } else
+ ret = readw(p);
+
+ /* Update saved column address */
+ host->col_addr = col + 2;
+
+ return ret;
+}
+
+/* Write data of length len to buffer buf. The data to be
+ * written on NAND Flash is first copied to RAMbuffer. After the Data Input
+ * Operation by the NFC, the data is written to NAND Flash */
+static void mxc_nand_write_buf(struct mtd_info *mtd,
+ const u_char *buf, int len)
+{
+ struct nand_chip *nand_chip = mtd->priv;
+ struct mxc_nand_host *host = nand_chip->priv;
+ int n, col, i = 0;
+
+ DEBUG(MTD_DEBUG_LEVEL3,
+ "mxc_nand_write_buf(col = %d, len = %d)\n", host->col_addr,
+ len);
+
+ col = host->col_addr;
+
+ /* Adjust saved column address */
+ if (col < mtd->writesize && host->spare_only)
+ col += mtd->writesize;
+
+ n = mtd->writesize + mtd->oobsize - col;
+ n = min(len, n);
+
+ DEBUG(MTD_DEBUG_LEVEL3,
+ "%s:%d: col = %d, n = %d\n", __func__, __LINE__, col, n);
+
+ while (n) {
+ void __iomem *p;
+
+ if (col < mtd->writesize)
+ p = host->regs + MAIN_AREA0 + (col & ~3);
+ else
+ p = host->regs + SPARE_AREA0 -
+ mtd->writesize + (col & ~3);
+
+ DEBUG(MTD_DEBUG_LEVEL3, "%s:%d: p = %p\n", __func__,
+ __LINE__, p);
+
+ if (((col | (int)&buf[i]) & 3) || n < 16) {
+ uint32_t data = 0;
+
+ if (col & 3 || n < 4)
+ data = readl(p);
+
+ switch (col & 3) {
+ case 0:
+ if (n) {
+ data = (data & 0xffffff00) |
+ (buf[i++] << 0);
+ n--;
+ col++;
+ }
+ case 1:
+ if (n) {
+ data = (data & 0xffff00ff) |
+ (buf[i++] << 8);
+ n--;
+ col++;
+ }
+ case 2:
+ if (n) {
+ data = (data & 0xff00ffff) |
+ (buf[i++] << 16);
+ n--;
+ col++;
+ }
+ case 3:
+ if (n) {
+ data = (data & 0x00ffffff) |
+ (buf[i++] << 24);
+ n--;
+ col++;
+ }
+ }
+
+ writel(data, p);
+ } else {
+ int m = mtd->writesize - col;
+
+ if (col >= mtd->writesize)
+ m += mtd->oobsize;
+
+ m = min(n, m) & ~3;
+
+ DEBUG(MTD_DEBUG_LEVEL3,
+ "%s:%d: n = %d, m = %d, i = %d, col = %d\n",
+ __func__, __LINE__, n, m, i, col);
+
+ memcpy(p, &buf[i], m);
+ col += m;
+ i += m;
+ n -= m;
+ }
+ }
+ /* Update saved column address */
+ host->col_addr = col;
+}
+
+/* Read the data buffer from the NAND Flash. To read the data from NAND
+ * Flash first the data output cycle is initiated by the NFC, which copies
+ * the data to RAMbuffer. This data of length len is then copied to buffer buf.
+ */
+static void mxc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+{
+ struct nand_chip *nand_chip = mtd->priv;
+ struct mxc_nand_host *host = nand_chip->priv;
+ int n, col, i = 0;
+
+ DEBUG(MTD_DEBUG_LEVEL3,
+ "mxc_nand_read_buf(col = %d, len = %d)\n", host->col_addr, len);
+
+ col = host->col_addr;
+
+ /* Adjust saved column address */
+ if (col < mtd->writesize && host->spare_only)
+ col += mtd->writesize;
+
+ n = mtd->writesize + mtd->oobsize - col;
+ n = min(len, n);
+
+ while (n) {
+ void __iomem *p;
+
+ if (col < mtd->writesize)
+ p = host->regs + MAIN_AREA0 + (col & ~3);
+ else
+ p = host->regs + SPARE_AREA0 -
+ mtd->writesize + (col & ~3);
+
+ if (((col | (int)&buf[i]) & 3) || n < 16) {
+ uint32_t data;
+
+ data = readl(p);
+ switch (col & 3) {
+ case 0:
+ if (n) {
+ buf[i++] = (uint8_t) (data);
+ n--;
+ col++;
+ }
+ case 1:
+ if (n) {
+ buf[i++] = (uint8_t) (data >> 8);
+ n--;
+ col++;
+ }
+ case 2:
+ if (n) {
+ buf[i++] = (uint8_t) (data >> 16);
+ n--;
+ col++;
+ }
+ case 3:
+ if (n) {
+ buf[i++] = (uint8_t) (data >> 24);
+ n--;
+ col++;
+ }
+ }
+ } else {
+ int m = mtd->writesize - col;
+
+ if (col >= mtd->writesize)
+ m += mtd->oobsize;
+
+ m = min(n, m) & ~3;
+ memcpy(&buf[i], p, m);
+ col += m;
+ i += m;
+ n -= m;
+ }
+ }
+ /* Update saved column address */
+ host->col_addr = col;
+
+}
+
+/* Used by the upper layer to verify the data in NAND Flash
+ * with the data in the buf. */
+static int mxc_nand_verify_buf(struct mtd_info *mtd,
+ const u_char *buf, int len)
+{
+ return -EFAULT;
+}
+
+/* This function is used by upper layer for select and
+ * deselect of the NAND chip */
+static void mxc_nand_select_chip(struct mtd_info *mtd, int chip)
+{
+ struct nand_chip *nand_chip = mtd->priv;
+ struct mxc_nand_host *host = nand_chip->priv;
+
+#ifdef CONFIG_MTD_NAND_MXC_FORCE_CE
+ if (chip > 0) {
+ DEBUG(MTD_DEBUG_LEVEL0,
+ "ERROR: Illegal chip select (chip = %d)\n", chip);
+ return;
+ }
+
+ if (chip == -1) {
+ writew(readw(host->regs + NFC_CONFIG1) & ~NFC_CE,
+ host->regs + NFC_CONFIG1);
+ return;
+ }
+
+ writew(readw(host->regs + NFC_CONFIG1) | NFC_CE,
+ host->regs + NFC_CONFIG1);
+#endif
+
+ switch (chip) {
+ case -1:
+ /* Disable the NFC clock */
+ if (host->clk_act) {
+ clk_disable(host->clk);
+ host->clk_act = 0;
+ }
+ break;
+ case 0:
+ /* Enable the NFC clock */
+ if (!host->clk_act) {
+ clk_enable(host->clk);
+ host->clk_act = 1;
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+/* Used by the upper layer to write command to NAND Flash for
+ * different operations to be carried out on NAND Flash */
+static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
+ int column, int page_addr)
+{
+ struct nand_chip *nand_chip = mtd->priv;
+ struct mxc_nand_host *host = nand_chip->priv;
+ int useirq = true;
+
+ DEBUG(MTD_DEBUG_LEVEL3,
+ "mxc_nand_command (cmd = 0x%x, col = 0x%x, page = 0x%x)\n",
+ command, column, page_addr);
+
+ /* Reset command state information */
+ host->status_request = false;
+
+ /* Command pre-processing step */
+ switch (command) {
+
+ case NAND_CMD_STATUS:
+ host->col_addr = 0;
+ host->status_request = true;
+ break;
+
+ case NAND_CMD_READ0:
+ host->col_addr = column;
+ host->spare_only = false;
+ useirq = false;
+ break;
+
+ case NAND_CMD_READOOB:
+ host->col_addr = column;
+ host->spare_only = true;
+ useirq = false;
+ if (host->pagesize_2k)
+ command = NAND_CMD_READ0; /* only READ0 is valid */
+ break;
+
+ case NAND_CMD_SEQIN:
+ if (column >= mtd->writesize) {
+ /*
+ * FIXME: before send SEQIN command for write OOB,
+ * We must read one page out.
+ * For K9F1GXX has no READ1 command to set current HW
+ * pointer to spare area, we must write the whole page
+ * including OOB together.
+ */
+ if (host->pagesize_2k)
+ /* call ourself to read a page */
+ mxc_nand_command(mtd, NAND_CMD_READ0, 0,
+ page_addr);
+
+ host->col_addr = column - mtd->writesize;
+ host->spare_only = true;
+
+ /* Set program pointer to spare region */
+ if (!host->pagesize_2k)
+ send_cmd(host, NAND_CMD_READOOB, false);
+ } else {
+ host->spare_only = false;
+ host->col_addr = column;
+
+ /* Set program pointer to page start */
+ if (!host->pagesize_2k)
+ send_cmd(host, NAND_CMD_READ0, false);
+ }
+ useirq = false;
+ break;
+
+ case NAND_CMD_PAGEPROG:
+ send_prog_page(host, 0, host->spare_only);
+
+ if (host->pagesize_2k) {
+ /* data in 4 areas datas */
+ send_prog_page(host, 1, host->spare_only);
+ send_prog_page(host, 2, host->spare_only);
+ send_prog_page(host, 3, host->spare_only);
+ }
+
+ break;
+
+ case NAND_CMD_ERASE1:
+ useirq = false;
+ break;
+ }
+
+ /* Write out the command to the device. */
+ send_cmd(host, command, useirq);
+
+ /* Write out column address, if necessary */
+ if (column != -1) {
+ /*
+ * MXC NANDFC can only perform full page+spare or
+ * spare-only read/write. When the upper layers
+ * layers perform a read/write buf operation,
+ * we will used the saved column adress to index into
+ * the full page.
+ */
+ send_addr(host, 0, page_addr == -1);
+ if (host->pagesize_2k)
+ /* another col addr cycle for 2k page */
+ send_addr(host, 0, false);
+ }
+
+ /* Write out page address, if necessary */
+ if (page_addr != -1) {
+ /* paddr_0 - p_addr_7 */
+ send_addr(host, (page_addr & 0xff), false);
+
+ if (host->pagesize_2k) {
+ send_addr(host, (page_addr >> 8) & 0xFF, false);
+ if (mtd->size >= 0x40000000)
+ send_addr(host, (page_addr >> 16) & 0xff, true);
+ } else {
+ /* One more address cycle for higher density devices */
+ if (mtd->size >= 0x4000000) {
+ /* paddr_8 - paddr_15 */
+ send_addr(host, (page_addr >> 8) & 0xff, false);
+ send_addr(host, (page_addr >> 16) & 0xff, true);
+ } else
+ /* paddr_8 - paddr_15 */
+ send_addr(host, (page_addr >> 8) & 0xff, true);
+ }
+ }
+
+ /* Command post-processing step */
+ switch (command) {
+
+ case NAND_CMD_RESET:
+ break;
+
+ case NAND_CMD_READOOB:
+ case NAND_CMD_READ0:
+ if (host->pagesize_2k) {
+ /* send read confirm command */
+ send_cmd(host, NAND_CMD_READSTART, true);
+ /* read for each AREA */
+ send_read_page(host, 0, host->spare_only);
+ send_read_page(host, 1, host->spare_only);
+ send_read_page(host, 2, host->spare_only);
+ send_read_page(host, 3, host->spare_only);
+ } else
+ send_read_page(host, 0, host->spare_only);
+ break;
+
+ case NAND_CMD_READID:
+ send_read_id(host);
+ break;
+
+ case NAND_CMD_PAGEPROG:
+ break;
+
+ case NAND_CMD_STATUS:
+ break;
+
+ case NAND_CMD_ERASE2:
+ break;
+ }
+}
+
+static int __init mxcnd_probe(struct platform_device *pdev)
+{
+ struct nand_chip *this;
+ struct mtd_info *mtd;
+ struct mxc_nand_platform_data *pdata = pdev->dev.platform_data;
+ struct mxc_nand_host *host;
+ struct resource *res;
+ uint16_t tmp;
+ int err = 0, nr_parts = 0;
+
+ /* Allocate memory for MTD device structure and private data */
+ host = kzalloc(sizeof(struct mxc_nand_host), GFP_KERNEL);
+ if (!host)
+ return -ENOMEM;
+
+ host->dev = &pdev->dev;
+ /* structures must be linked */
+ this = &host->nand;
+ mtd = &host->mtd;
+ mtd->priv = this;
+ mtd->owner = THIS_MODULE;
+
+ /* 50 us command delay time */
+ this->chip_delay = 5;
+
+ this->priv = host;
+ this->dev_ready = mxc_nand_dev_ready;
+ this->cmdfunc = mxc_nand_command;
+ this->select_chip = mxc_nand_select_chip;
+ this->read_byte = mxc_nand_read_byte;
+ this->read_word = mxc_nand_read_word;
+ this->write_buf = mxc_nand_write_buf;
+ this->read_buf = mxc_nand_read_buf;
+ this->verify_buf = mxc_nand_verify_buf;
+
+ host->clk = clk_get(&pdev->dev, "nfc_clk");
+ if (IS_ERR(host->clk))
+ goto eclk;
+
+ clk_enable(host->clk);
+ host->clk_act = 1;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ err = -ENODEV;
+ goto eres;
+ }
+
+ host->regs = ioremap(res->start, res->end - res->start + 1);
+ if (!host->regs) {
+ err = -EIO;
+ goto eres;
+ }
+
+ tmp = readw(host->regs + NFC_CONFIG1);
+ tmp |= NFC_INT_MSK;
+ writew(tmp, host->regs + NFC_CONFIG1);
+
+ init_waitqueue_head(&host->irq_waitq);
+
+ host->irq = platform_get_irq(pdev, 0);
+
+ err = request_irq(host->irq, mxc_nfc_irq, 0, "mxc_nd", host);
+ if (err)
+ goto eirq;
+
+ if (pdata->hw_ecc) {
+ this->ecc.calculate = mxc_nand_calculate_ecc;
+ this->ecc.hwctl = mxc_nand_enable_hwecc;
+ this->ecc.correct = mxc_nand_correct_data;
+ this->ecc.mode = NAND_ECC_HW;
+ this->ecc.size = 512;
+ this->ecc.bytes = 3;
+ this->ecc.layout = &nand_hw_eccoob_8;
+ tmp = readw(host->regs + NFC_CONFIG1);
+ tmp |= NFC_ECC_EN;
+ writew(tmp, host->regs + NFC_CONFIG1);
+ } else {
+ this->ecc.size = 512;
+ this->ecc.bytes = 3;
+ this->ecc.layout = &nand_hw_eccoob_8;
+ this->ecc.mode = NAND_ECC_SOFT;
+ tmp = readw(host->regs + NFC_CONFIG1);
+ tmp &= ~NFC_ECC_EN;
+ writew(tmp, host->regs + NFC_CONFIG1);
+ }
+
+ /* Reset NAND */
+ this->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+
+ /* preset operation */
+ /* Unlock the internal RAM Buffer */
+ writew(0x2, host->regs + NFC_CONFIG);
+
+ /* Blocks to be unlocked */
+ writew(0x0, host->regs + NFC_UNLOCKSTART_BLKADDR);
+ writew(0x4000, host->regs + NFC_UNLOCKEND_BLKADDR);
+
+ /* Unlock Block Command for given address range */
+ writew(0x4, host->regs + NFC_WRPROT);
+
+ /* NAND bus width determines access funtions used by upper layer */
+ if (pdata->width == 2) {
+ this->options |= NAND_BUSWIDTH_16;
+ this->ecc.layout = &nand_hw_eccoob_16;
+ }
+
+ host->pagesize_2k = 0;
+
+ /* Scan to find existence of the device */
+ if (nand_scan(mtd, 1)) {
+ DEBUG(MTD_DEBUG_LEVEL0,
+ "MXC_ND: Unable to find any NAND device.\n");
+ err = -ENXIO;
+ goto escan;
+ }
+
+ /* Register the partitions */
+#ifdef CONFIG_MTD_PARTITIONS
+ nr_parts =
+ parse_mtd_partitions(mtd, part_probes, &host->parts, 0);
+ if (nr_parts > 0)
+ add_mtd_partitions(mtd, host->parts, nr_parts);
+ else
+#endif
+ {
+ pr_info("Registering %s as whole device\n", mtd->name);
+ add_mtd_device(mtd);
+ }
+
+ platform_set_drvdata(pdev, host);
+
+ return 0;
+
+escan:
+ free_irq(host->irq, NULL);
+eirq:
+ iounmap(host->regs);
+eres:
+ clk_put(host->clk);
+eclk:
+ kfree(host);
+
+ return err;
+}
+
+static int __devexit mxcnd_remove(struct platform_device *pdev)
+{
+ struct mxc_nand_host *host = platform_get_drvdata(pdev);
+
+ clk_put(host->clk);
+
+ platform_set_drvdata(pdev, NULL);
+
+ nand_release(&host->mtd);
+ free_irq(host->irq, NULL);
+ iounmap(host->regs);
+ kfree(host);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int mxcnd_suspend(struct platform_device *pdev, pm_message_t state)
+{
+ struct mtd_info *info = platform_get_drvdata(pdev);
+ int ret = 0;
+
+ DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND suspend\n");
+ if (info)
+ ret = info->suspend(info);
+
+ /* Disable the NFC clock */
+ clk_disable(nfc_clk); /* FIXME */
+
+ return ret;
+}
+
+static int mxcnd_resume(struct platform_device *pdev)
+{
+ struct mtd_info *info = platform_get_drvdata(pdev);
+ int ret = 0;
+
+ DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND resume\n");
+ /* Enable the NFC clock */
+ clk_enable(nfc_clk); /* FIXME */
+
+ if (info)
+ info->resume(info);
+
+ return ret;
+}
+
+#else
+# define mxcnd_suspend NULL
+# define mxcnd_resume NULL
+#endif /* CONFIG_PM */
+
+static struct platform_driver mxcnd_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ },
+ .remove = __exit_p(mxcnd_remove),
+ .suspend = mxcnd_suspend,
+ .resume = mxcnd_resume,
+};
+
+static int __init mxc_nd_init(void)
+{
+ /* Register the device driver structure. */
+ pr_info("MXC MTD nand Driver\n");
+ if (platform_driver_probe(&mxcnd_driver, mxcnd_probe) != 0) {
+ printk(KERN_ERR "Driver register failed for mxcnd_driver\n");
+ return -ENODEV;
+ }
+ return 0;
+}
+
+static void __exit mxc_nd_cleanup(void)
+{
+ /* Unregister the device structure */
+ platform_driver_unregister(&mxcnd_driver);
+}
+
+module_init(mxc_nd_init);
+module_exit(mxc_nd_cleanup);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("MXC NAND MTD driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index d1129bae6c2..0a9c9cd33f9 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -801,9 +801,9 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
* nand_read_subpage - [REPLACABLE] software ecc based sub-page read function
* @mtd: mtd info structure
* @chip: nand chip info structure
- * @dataofs offset of requested data within the page
- * @readlen data length
- * @buf: buffer to store read data
+ * @data_offs: offset of requested data within the page
+ * @readlen: data length
+ * @bufpoi: buffer to store read data
*/
static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi)
{
@@ -2042,7 +2042,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
return -EINVAL;
}
- instr->fail_addr = 0xffffffff;
+ instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
/* Grab the lock and see if the device is available */
nand_get_device(chip, mtd, FL_ERASING);
@@ -2318,6 +2318,12 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
/* Select the device */
chip->select_chip(mtd, 0);
+ /*
+ * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
+ * after power-up
+ */
+ chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+
/* Send the command for reading device ID */
chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
@@ -2488,6 +2494,8 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips)
/* Check for a chip array */
for (i = 1; i < maxchips; i++) {
chip->select_chip(mtd, i);
+ /* See comment in nand_get_flash_type for reset */
+ chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
/* Send the command for reading device ID */
chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
/* Read manufacturer and device IDs */
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index 918a806a847..868147acce2 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -1,13 +1,18 @@
/*
- * This file contains an ECC algorithm from Toshiba that detects and
- * corrects 1 bit errors in a 256 byte block of data.
+ * This file contains an ECC algorithm that detects and corrects 1 bit
+ * errors in a 256 byte block of data.
*
* drivers/mtd/nand/nand_ecc.c
*
- * Copyright (C) 2000-2004 Steven J. Hill (sjhill@realitydiluted.com)
- * Toshiba America Electronics Components, Inc.
+ * Copyright © 2008 Koninklijke Philips Electronics NV.
+ * Author: Frans Meulenbroeks
*
- * Copyright (C) 2006 Thomas Gleixner <tglx@linutronix.de>
+ * Completely replaces the previous ECC implementation which was written by:
+ * Steven J. Hill (sjhill@realitydiluted.com)
+ * Thomas Gleixner (tglx@linutronix.de)
+ *
+ * Information on how this algorithm works and how it was developed
+ * can be found in Documentation/mtd/nand_ecc.txt
*
* This file is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -23,174 +28,475 @@
* with this file; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*
- * As a special exception, if other files instantiate templates or use
- * macros or inline functions from these files, or you compile these
- * files and link them with other works to produce a work based on these
- * files, these files do not by themselves cause the resulting work to be
- * covered by the GNU General Public License. However the source code for
- * these files must still be made available in accordance with section (3)
- * of the GNU General Public License.
- *
- * This exception does not invalidate any other reasons why a work based on
- * this file might be covered by the GNU General Public License.
*/
+/*
+ * The STANDALONE macro is useful when running the code outside the kernel
+ * e.g. when running the code in a testbed or a benchmark program.
+ * When STANDALONE is used, the module related macros are commented out
+ * as well as the linux include files.
+ * Instead a private definition of mtd_info is given to satisfy the compiler
+ * (the code does not use mtd_info, so the code does not care)
+ */
+#ifndef STANDALONE
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
#include <linux/mtd/nand_ecc.h>
+#include <asm/byteorder.h>
+#else
+#include <stdint.h>
+struct mtd_info;
+#define EXPORT_SYMBOL(x) /* x */
+
+#define MODULE_LICENSE(x) /* x */
+#define MODULE_AUTHOR(x) /* x */
+#define MODULE_DESCRIPTION(x) /* x */
+
+#define printk printf
+#define KERN_ERR ""
+#endif
+
+/*
+ * invparity is a 256 byte table that contains the odd parity
+ * for each byte. So if the number of bits in a byte is even,
+ * the array element is 1, and when the number of bits is odd
+ * the array eleemnt is 0.
+ */
+static const char invparity[256] = {
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
+};
+
+/*
+ * bitsperbyte contains the number of bits per byte
+ * this is only used for testing and repairing parity
+ * (a precalculated value slightly improves performance)
+ */
+static const char bitsperbyte[256] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
+};
/*
- * Pre-calculated 256-way 1 byte column parity
+ * addressbits is a lookup table to filter out the bits from the xor-ed
+ * ecc data that identify the faulty location.
+ * this is only used for repairing parity
+ * see the comments in nand_correct_data for more details
*/
-static const u_char nand_ecc_precalc_table[] = {
- 0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00,
- 0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65,
- 0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66,
- 0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03,
- 0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69,
- 0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c,
- 0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f,
- 0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a,
- 0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a,
- 0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f,
- 0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c,
- 0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69,
- 0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03,
- 0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66,
- 0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65,
- 0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00
+static const char addressbits[256] = {
+ 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+ 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+ 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+ 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+ 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+ 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+ 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+ 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+ 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+ 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+ 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+ 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+ 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+ 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+ 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+ 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+ 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+ 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+ 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+ 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+ 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+ 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+ 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+ 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+ 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+ 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+ 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+ 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+ 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+ 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+ 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+ 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
};
/**
- * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block
+ * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
+ * block
* @mtd: MTD block structure
- * @dat: raw data
- * @ecc_code: buffer for ECC
+ * @buf: input buffer with raw data
+ * @code: output buffer with ECC
*/
-int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
- u_char *ecc_code)
+int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+ unsigned char *code)
{
- uint8_t idx, reg1, reg2, reg3, tmp1, tmp2;
int i;
+ const uint32_t *bp = (uint32_t *)buf;
+ /* 256 or 512 bytes/ecc */
+ const uint32_t eccsize_mult =
+ (((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
+ uint32_t cur; /* current value in buffer */
+ /* rp0..rp15..rp17 are the various accumulated parities (per byte) */
+ uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+ uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15, rp16;
+ uint32_t uninitialized_var(rp17); /* to make compiler happy */
+ uint32_t par; /* the cumulative parity for all data */
+ uint32_t tmppar; /* the cumulative parity for this iteration;
+ for rp12, rp14 and rp16 at the end of the
+ loop */
+
+ par = 0;
+ rp4 = 0;
+ rp6 = 0;
+ rp8 = 0;
+ rp10 = 0;
+ rp12 = 0;
+ rp14 = 0;
+ rp16 = 0;
+
+ /*
+ * The loop is unrolled a number of times;
+ * This avoids if statements to decide on which rp value to update
+ * Also we process the data by longwords.
+ * Note: passing unaligned data might give a performance penalty.
+ * It is assumed that the buffers are aligned.
+ * tmppar is the cumulative sum of this iteration.
+ * needed for calculating rp12, rp14, rp16 and par
+ * also used as a performance improvement for rp6, rp8 and rp10
+ */
+ for (i = 0; i < eccsize_mult << 2; i++) {
+ cur = *bp++;
+ tmppar = cur;
+ rp4 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp6 ^= tmppar;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp4 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp8 ^= tmppar;
- /* Initialize variables */
- reg1 = reg2 = reg3 = 0;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp4 ^= cur;
+ rp6 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp6 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp4 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp10 ^= tmppar;
- /* Build up column parity */
- for(i = 0; i < 256; i++) {
- /* Get CP0 - CP5 from table */
- idx = nand_ecc_precalc_table[*dat++];
- reg1 ^= (idx & 0x3f);
+ cur = *bp++;
+ tmppar ^= cur;
+ rp4 ^= cur;
+ rp6 ^= cur;
+ rp8 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp6 ^= cur;
+ rp8 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp4 ^= cur;
+ rp8 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp8 ^= cur;
- /* All bit XOR = 1 ? */
- if (idx & 0x40) {
- reg3 ^= (uint8_t) i;
- reg2 ^= ~((uint8_t) i);
- }
+ cur = *bp++;
+ tmppar ^= cur;
+ rp4 ^= cur;
+ rp6 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp6 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+ rp4 ^= cur;
+ cur = *bp++;
+ tmppar ^= cur;
+
+ par ^= tmppar;
+ if ((i & 0x1) == 0)
+ rp12 ^= tmppar;
+ if ((i & 0x2) == 0)
+ rp14 ^= tmppar;
+ if (eccsize_mult == 2 && (i & 0x4) == 0)
+ rp16 ^= tmppar;
}
- /* Create non-inverted ECC code from line parity */
- tmp1 = (reg3 & 0x80) >> 0; /* B7 -> B7 */
- tmp1 |= (reg2 & 0x80) >> 1; /* B7 -> B6 */
- tmp1 |= (reg3 & 0x40) >> 1; /* B6 -> B5 */
- tmp1 |= (reg2 & 0x40) >> 2; /* B6 -> B4 */
- tmp1 |= (reg3 & 0x20) >> 2; /* B5 -> B3 */
- tmp1 |= (reg2 & 0x20) >> 3; /* B5 -> B2 */
- tmp1 |= (reg3 & 0x10) >> 3; /* B4 -> B1 */
- tmp1 |= (reg2 & 0x10) >> 4; /* B4 -> B0 */
-
- tmp2 = (reg3 & 0x08) << 4; /* B3 -> B7 */
- tmp2 |= (reg2 & 0x08) << 3; /* B3 -> B6 */
- tmp2 |= (reg3 & 0x04) << 3; /* B2 -> B5 */
- tmp2 |= (reg2 & 0x04) << 2; /* B2 -> B4 */
- tmp2 |= (reg3 & 0x02) << 2; /* B1 -> B3 */
- tmp2 |= (reg2 & 0x02) << 1; /* B1 -> B2 */
- tmp2 |= (reg3 & 0x01) << 1; /* B0 -> B1 */
- tmp2 |= (reg2 & 0x01) << 0; /* B7 -> B0 */
-
- /* Calculate final ECC code */
-#ifdef CONFIG_MTD_NAND_ECC_SMC
- ecc_code[0] = ~tmp2;
- ecc_code[1] = ~tmp1;
+ /*
+ * handle the fact that we use longword operations
+ * we'll bring rp4..rp14..rp16 back to single byte entities by
+ * shifting and xoring first fold the upper and lower 16 bits,
+ * then the upper and lower 8 bits.
+ */
+ rp4 ^= (rp4 >> 16);
+ rp4 ^= (rp4 >> 8);
+ rp4 &= 0xff;
+ rp6 ^= (rp6 >> 16);
+ rp6 ^= (rp6 >> 8);
+ rp6 &= 0xff;
+ rp8 ^= (rp8 >> 16);
+ rp8 ^= (rp8 >> 8);
+ rp8 &= 0xff;
+ rp10 ^= (rp10 >> 16);
+ rp10 ^= (rp10 >> 8);
+ rp10 &= 0xff;
+ rp12 ^= (rp12 >> 16);
+ rp12 ^= (rp12 >> 8);
+ rp12 &= 0xff;
+ rp14 ^= (rp14 >> 16);
+ rp14 ^= (rp14 >> 8);
+ rp14 &= 0xff;
+ if (eccsize_mult == 2) {
+ rp16 ^= (rp16 >> 16);
+ rp16 ^= (rp16 >> 8);
+ rp16 &= 0xff;
+ }
+
+ /*
+ * we also need to calculate the row parity for rp0..rp3
+ * This is present in par, because par is now
+ * rp3 rp3 rp2 rp2 in little endian and
+ * rp2 rp2 rp3 rp3 in big endian
+ * as well as
+ * rp1 rp0 rp1 rp0 in little endian and
+ * rp0 rp1 rp0 rp1 in big endian
+ * First calculate rp2 and rp3
+ */
+#ifdef __BIG_ENDIAN
+ rp2 = (par >> 16);
+ rp2 ^= (rp2 >> 8);
+ rp2 &= 0xff;
+ rp3 = par & 0xffff;
+ rp3 ^= (rp3 >> 8);
+ rp3 &= 0xff;
#else
- ecc_code[0] = ~tmp1;
- ecc_code[1] = ~tmp2;
+ rp3 = (par >> 16);
+ rp3 ^= (rp3 >> 8);
+ rp3 &= 0xff;
+ rp2 = par & 0xffff;
+ rp2 ^= (rp2 >> 8);
+ rp2 &= 0xff;
#endif
- ecc_code[2] = ((~reg1) << 2) | 0x03;
- return 0;
-}
-EXPORT_SYMBOL(nand_calculate_ecc);
+ /* reduce par to 16 bits then calculate rp1 and rp0 */
+ par ^= (par >> 16);
+#ifdef __BIG_ENDIAN
+ rp0 = (par >> 8) & 0xff;
+ rp1 = (par & 0xff);
+#else
+ rp1 = (par >> 8) & 0xff;
+ rp0 = (par & 0xff);
+#endif
-static inline int countbits(uint32_t byte)
-{
- int res = 0;
+ /* finally reduce par to 8 bits */
+ par ^= (par >> 8);
+ par &= 0xff;
- for (;byte; byte >>= 1)
- res += byte & 0x01;
- return res;
+ /*
+ * and calculate rp5..rp15..rp17
+ * note that par = rp4 ^ rp5 and due to the commutative property
+ * of the ^ operator we can say:
+ * rp5 = (par ^ rp4);
+ * The & 0xff seems superfluous, but benchmarking learned that
+ * leaving it out gives slightly worse results. No idea why, probably
+ * it has to do with the way the pipeline in pentium is organized.
+ */
+ rp5 = (par ^ rp4) & 0xff;
+ rp7 = (par ^ rp6) & 0xff;
+ rp9 = (par ^ rp8) & 0xff;
+ rp11 = (par ^ rp10) & 0xff;
+ rp13 = (par ^ rp12) & 0xff;
+ rp15 = (par ^ rp14) & 0xff;
+ if (eccsize_mult == 2)
+ rp17 = (par ^ rp16) & 0xff;
+
+ /*
+ * Finally calculate the ecc bits.
+ * Again here it might seem that there are performance optimisations
+ * possible, but benchmarks showed that on the system this is developed
+ * the code below is the fastest
+ */
+#ifdef CONFIG_MTD_NAND_ECC_SMC
+ code[0] =
+ (invparity[rp7] << 7) |
+ (invparity[rp6] << 6) |
+ (invparity[rp5] << 5) |
+ (invparity[rp4] << 4) |
+ (invparity[rp3] << 3) |
+ (invparity[rp2] << 2) |
+ (invparity[rp1] << 1) |
+ (invparity[rp0]);
+ code[1] =
+ (invparity[rp15] << 7) |
+ (invparity[rp14] << 6) |
+ (invparity[rp13] << 5) |
+ (invparity[rp12] << 4) |
+ (invparity[rp11] << 3) |
+ (invparity[rp10] << 2) |
+ (invparity[rp9] << 1) |
+ (invparity[rp8]);
+#else
+ code[1] =
+ (invparity[rp7] << 7) |
+ (invparity[rp6] << 6) |
+ (invparity[rp5] << 5) |
+ (invparity[rp4] << 4) |
+ (invparity[rp3] << 3) |
+ (invparity[rp2] << 2) |
+ (invparity[rp1] << 1) |
+ (invparity[rp0]);
+ code[0] =
+ (invparity[rp15] << 7) |
+ (invparity[rp14] << 6) |
+ (invparity[rp13] << 5) |
+ (invparity[rp12] << 4) |
+ (invparity[rp11] << 3) |
+ (invparity[rp10] << 2) |
+ (invparity[rp9] << 1) |
+ (invparity[rp8]);
+#endif
+ if (eccsize_mult == 1)
+ code[2] =
+ (invparity[par & 0xf0] << 7) |
+ (invparity[par & 0x0f] << 6) |
+ (invparity[par & 0xcc] << 5) |
+ (invparity[par & 0x33] << 4) |
+ (invparity[par & 0xaa] << 3) |
+ (invparity[par & 0x55] << 2) |
+ 3;
+ else
+ code[2] =
+ (invparity[par & 0xf0] << 7) |
+ (invparity[par & 0x0f] << 6) |
+ (invparity[par & 0xcc] << 5) |
+ (invparity[par & 0x33] << 4) |
+ (invparity[par & 0xaa] << 3) |
+ (invparity[par & 0x55] << 2) |
+ (invparity[rp17] << 1) |
+ (invparity[rp16] << 0);
+ return 0;
}
+EXPORT_SYMBOL(nand_calculate_ecc);
/**
* nand_correct_data - [NAND Interface] Detect and correct bit error(s)
* @mtd: MTD block structure
- * @dat: raw data read from the chip
+ * @buf: raw data read from the chip
* @read_ecc: ECC from the chip
* @calc_ecc: the ECC calculated from raw data
*
- * Detect and correct a 1 bit error for 256 byte block
+ * Detect and correct a 1 bit error for 256/512 byte block
*/
-int nand_correct_data(struct mtd_info *mtd, u_char *dat,
- u_char *read_ecc, u_char *calc_ecc)
+int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
+ unsigned char *read_ecc, unsigned char *calc_ecc)
{
- uint8_t s0, s1, s2;
+ unsigned char b0, b1, b2;
+ unsigned char byte_addr, bit_addr;
+ /* 256 or 512 bytes/ecc */
+ const uint32_t eccsize_mult =
+ (((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
+ /*
+ * b0 to b2 indicate which bit is faulty (if any)
+ * we might need the xor result more than once,
+ * so keep them in a local var
+ */
#ifdef CONFIG_MTD_NAND_ECC_SMC
- s0 = calc_ecc[0] ^ read_ecc[0];
- s1 = calc_ecc[1] ^ read_ecc[1];
- s2 = calc_ecc[2] ^ read_ecc[2];
+ b0 = read_ecc[0] ^ calc_ecc[0];
+ b1 = read_ecc[1] ^ calc_ecc[1];
#else
- s1 = calc_ecc[0] ^ read_ecc[0];
- s0 = calc_ecc[1] ^ read_ecc[1];
- s2 = calc_ecc[2] ^ read_ecc[2];
+ b0 = read_ecc[1] ^ calc_ecc[1];
+ b1 = read_ecc[0] ^ calc_ecc[0];
#endif
- if ((s0 | s1 | s2) == 0)
- return 0;
-
- /* Check for a single bit error */
- if( ((s0 ^ (s0 >> 1)) & 0x55) == 0x55 &&
- ((s1 ^ (s1 >> 1)) & 0x55) == 0x55 &&
- ((s2 ^ (s2 >> 1)) & 0x54) == 0x54) {
+ b2 = read_ecc[2] ^ calc_ecc[2];
- uint32_t byteoffs, bitnum;
+ /* check if there are any bitfaults */
- byteoffs = (s1 << 0) & 0x80;
- byteoffs |= (s1 << 1) & 0x40;
- byteoffs |= (s1 << 2) & 0x20;
- byteoffs |= (s1 << 3) & 0x10;
+ /* repeated if statements are slightly more efficient than switch ... */
+ /* ordered in order of likelihood */
- byteoffs |= (s0 >> 4) & 0x08;
- byteoffs |= (s0 >> 3) & 0x04;
- byteoffs |= (s0 >> 2) & 0x02;
- byteoffs |= (s0 >> 1) & 0x01;
-
- bitnum = (s2 >> 5) & 0x04;
- bitnum |= (s2 >> 4) & 0x02;
- bitnum |= (s2 >> 3) & 0x01;
-
- dat[byteoffs] ^= (1 << bitnum);
+ if ((b0 | b1 | b2) == 0)
+ return 0; /* no error */
+ if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
+ (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
+ ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
+ (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
+ /* single bit error */
+ /*
+ * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
+ * byte, cp 5/3/1 indicate the faulty bit.
+ * A lookup table (called addressbits) is used to filter
+ * the bits from the byte they are in.
+ * A marginal optimisation is possible by having three
+ * different lookup tables.
+ * One as we have now (for b0), one for b2
+ * (that would avoid the >> 1), and one for b1 (with all values
+ * << 4). However it was felt that introducing two more tables
+ * hardly justify the gain.
+ *
+ * The b2 shift is there to get rid of the lowest two bits.
+ * We could also do addressbits[b2] >> 1 but for the
+ * performace it does not make any difference
+ */
+ if (eccsize_mult == 1)
+ byte_addr = (addressbits[b1] << 4) + addressbits[b0];
+ else
+ byte_addr = (addressbits[b2 & 0x3] << 8) +
+ (addressbits[b1] << 4) + addressbits[b0];
+ bit_addr = addressbits[b2 >> 2];
+ /* flip the bit */
+ buf[byte_addr] ^= (1 << bit_addr);
return 1;
- }
- if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1)
- return 1;
+ }
+ /* count nr of bits; use table lookup, faster than calculating it */
+ if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
+ return 1; /* error in ecc data; no action needed */
- return -EBADMSG;
+ printk(KERN_ERR "uncorrectable error : ");
+ return -1;
}
EXPORT_SYMBOL(nand_correct_data);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Steven J. Hill <sjhill@realitydiluted.com>");
+MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
MODULE_DESCRIPTION("Generic NAND ECC support");
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 556e8131ecd..ae7c57781a6 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -38,7 +38,6 @@
#include <linux/delay.h>
#include <linux/list.h>
#include <linux/random.h>
-#include <asm/div64.h>
/* Default simulator parameters values */
#if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE) || \
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index a64ad15b8fd..c0fa9c9edf0 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -115,55 +115,11 @@ enum {
STATE_PIO_WRITING,
};
-struct pxa3xx_nand_timing {
- unsigned int tCH; /* Enable signal hold time */
- unsigned int tCS; /* Enable signal setup time */
- unsigned int tWH; /* ND_nWE high duration */
- unsigned int tWP; /* ND_nWE pulse time */
- unsigned int tRH; /* ND_nRE high duration */
- unsigned int tRP; /* ND_nRE pulse width */
- unsigned int tR; /* ND_nWE high to ND_nRE low for read */
- unsigned int tWHR; /* ND_nWE high to ND_nRE low for status read */
- unsigned int tAR; /* ND_ALE low to ND_nRE low delay */
-};
-
-struct pxa3xx_nand_cmdset {
- uint16_t read1;
- uint16_t read2;
- uint16_t program;
- uint16_t read_status;
- uint16_t read_id;
- uint16_t erase;
- uint16_t reset;
- uint16_t lock;
- uint16_t unlock;
- uint16_t lock_status;
-};
-
-struct pxa3xx_nand_flash {
- struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
- struct pxa3xx_nand_cmdset *cmdset;
-
- uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
- uint32_t page_size; /* Page size in bytes (PAGE_SZ) */
- uint32_t flash_width; /* Width of Flash memory (DWIDTH_M) */
- uint32_t dfc_width; /* Width of flash controller(DWIDTH_C) */
- uint32_t num_blocks; /* Number of physical blocks in Flash */
- uint32_t chip_id;
-
- /* NOTE: these are automatically calculated, do not define */
- size_t oob_size;
- size_t read_id_bytes;
-
- unsigned int col_addr_cycles;
- unsigned int row_addr_cycles;
-};
-
struct pxa3xx_nand_info {
struct nand_chip nand_chip;
struct platform_device *pdev;
- struct pxa3xx_nand_flash *flash_info;
+ const struct pxa3xx_nand_flash *flash_info;
struct clk *clk;
void __iomem *mmio_base;
@@ -202,12 +158,20 @@ struct pxa3xx_nand_info {
uint32_t ndcb0;
uint32_t ndcb1;
uint32_t ndcb2;
+
+ /* calculated from pxa3xx_nand_flash data */
+ size_t oob_size;
+ size_t read_id_bytes;
+
+ unsigned int col_addr_cycles;
+ unsigned int row_addr_cycles;
};
static int use_dma = 1;
module_param(use_dma, bool, 0444);
MODULE_PARM_DESC(use_dma, "enable DMA for data transfering to/from NAND HW");
+#ifdef CONFIG_MTD_NAND_PXA3xx_BUILTIN
static struct pxa3xx_nand_cmdset smallpage_cmdset = {
.read1 = 0x0000,
.read2 = 0x0050,
@@ -291,11 +255,35 @@ static struct pxa3xx_nand_flash micron1GbX16 = {
.chip_id = 0xb12c,
};
+static struct pxa3xx_nand_timing stm2GbX16_timing = {
+ .tCH = 10,
+ .tCS = 35,
+ .tWH = 15,
+ .tWP = 25,
+ .tRH = 15,
+ .tRP = 25,
+ .tR = 25000,
+ .tWHR = 60,
+ .tAR = 10,
+};
+
+static struct pxa3xx_nand_flash stm2GbX16 = {
+ .timing = &stm2GbX16_timing,
+ .page_per_block = 64,
+ .page_size = 2048,
+ .flash_width = 16,
+ .dfc_width = 16,
+ .num_blocks = 2048,
+ .chip_id = 0xba20,
+};
+
static struct pxa3xx_nand_flash *builtin_flash_types[] = {
&samsung512MbX16,
&micron1GbX8,
&micron1GbX16,
+ &stm2GbX16,
};
+#endif /* CONFIG_MTD_NAND_PXA3xx_BUILTIN */
#define NDTR0_tCH(c) (min((c), 7) << 19)
#define NDTR0_tCS(c) (min((c), 7) << 16)
@@ -312,7 +300,7 @@ static struct pxa3xx_nand_flash *builtin_flash_types[] = {
#define ns2cycle(ns, clk) (int)(((ns) * (clk / 1000000) / 1000) + 1)
static void pxa3xx_nand_set_timing(struct pxa3xx_nand_info *info,
- struct pxa3xx_nand_timing *t)
+ const struct pxa3xx_nand_timing *t)
{
unsigned long nand_clk = clk_get_rate(info->clk);
uint32_t ndtr0, ndtr1;
@@ -354,8 +342,8 @@ static int wait_for_event(struct pxa3xx_nand_info *info, uint32_t event)
static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
uint16_t cmd, int column, int page_addr)
{
- struct pxa3xx_nand_flash *f = info->flash_info;
- struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
+ const struct pxa3xx_nand_flash *f = info->flash_info;
+ const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
/* calculate data size */
switch (f->page_size) {
@@ -373,14 +361,14 @@ static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
info->ndcb1 = 0;
info->ndcb2 = 0;
- info->ndcb0 |= NDCB0_ADDR_CYC(f->row_addr_cycles + f->col_addr_cycles);
+ info->ndcb0 |= NDCB0_ADDR_CYC(info->row_addr_cycles + info->col_addr_cycles);
- if (f->col_addr_cycles == 2) {
+ if (info->col_addr_cycles == 2) {
/* large block, 2 cycles for column address
* row address starts from 3rd cycle
*/
info->ndcb1 |= (page_addr << 16) | (column & 0xffff);
- if (f->row_addr_cycles == 3)
+ if (info->row_addr_cycles == 3)
info->ndcb2 = (page_addr >> 16) & 0xff;
} else
/* small block, 1 cycles for column address
@@ -406,7 +394,7 @@ static int prepare_erase_cmd(struct pxa3xx_nand_info *info,
static int prepare_other_cmd(struct pxa3xx_nand_info *info, uint16_t cmd)
{
- struct pxa3xx_nand_cmdset *cmdset = info->flash_info->cmdset;
+ const struct pxa3xx_nand_cmdset *cmdset = info->flash_info->cmdset;
info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
info->ndcb1 = 0;
@@ -641,8 +629,8 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
int column, int page_addr)
{
struct pxa3xx_nand_info *info = mtd->priv;
- struct pxa3xx_nand_flash *flash_info = info->flash_info;
- struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset;
+ const struct pxa3xx_nand_flash *flash_info = info->flash_info;
+ const struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset;
int ret;
info->use_dma = (use_dma) ? 1 : 0;
@@ -720,7 +708,7 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
info->use_dma = 0; /* force PIO read */
info->buf_start = 0;
info->buf_count = (command == NAND_CMD_READID) ?
- flash_info->read_id_bytes : 1;
+ info->read_id_bytes : 1;
if (prepare_other_cmd(info, (command == NAND_CMD_READID) ?
cmdset->read_id : cmdset->read_status))
@@ -861,8 +849,8 @@ static int pxa3xx_nand_ecc_correct(struct mtd_info *mtd,
static int __readid(struct pxa3xx_nand_info *info, uint32_t *id)
{
- struct pxa3xx_nand_flash *f = info->flash_info;
- struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
+ const struct pxa3xx_nand_flash *f = info->flash_info;
+ const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
uint32_t ndcr;
uint8_t id_buff[8];
@@ -891,7 +879,7 @@ fail_timeout:
}
static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
- struct pxa3xx_nand_flash *f)
+ const struct pxa3xx_nand_flash *f)
{
struct platform_device *pdev = info->pdev;
struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
@@ -904,25 +892,25 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
return -EINVAL;
/* calculate flash information */
- f->oob_size = (f->page_size == 2048) ? 64 : 16;
- f->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
+ info->oob_size = (f->page_size == 2048) ? 64 : 16;
+ info->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
/* calculate addressing information */
- f->col_addr_cycles = (f->page_size == 2048) ? 2 : 1;
+ info->col_addr_cycles = (f->page_size == 2048) ? 2 : 1;
if (f->num_blocks * f->page_per_block > 65536)
- f->row_addr_cycles = 3;
+ info->row_addr_cycles = 3;
else
- f->row_addr_cycles = 2;
+ info->row_addr_cycles = 2;
ndcr |= (pdata->enable_arbiter) ? NDCR_ND_ARB_EN : 0;
- ndcr |= (f->col_addr_cycles == 2) ? NDCR_RA_START : 0;
+ ndcr |= (info->col_addr_cycles == 2) ? NDCR_RA_START : 0;
ndcr |= (f->page_per_block == 64) ? NDCR_PG_PER_BLK : 0;
ndcr |= (f->page_size == 2048) ? NDCR_PAGE_SZ : 0;
ndcr |= (f->flash_width == 16) ? NDCR_DWIDTH_M : 0;
ndcr |= (f->dfc_width == 16) ? NDCR_DWIDTH_C : 0;
- ndcr |= NDCR_RD_ID_CNT(f->read_id_bytes);
+ ndcr |= NDCR_RD_ID_CNT(info->read_id_bytes);
ndcr |= NDCR_SPARE_EN; /* enable spare by default */
info->reg_ndcr = ndcr;
@@ -932,12 +920,27 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
return 0;
}
-static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info)
+static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
+ const struct pxa3xx_nand_platform_data *pdata)
{
- struct pxa3xx_nand_flash *f;
- uint32_t id;
+ const struct pxa3xx_nand_flash *f;
+ uint32_t id = -1;
int i;
+ for (i = 0; i<pdata->num_flash; ++i) {
+ f = pdata->flash + i;
+
+ if (pxa3xx_nand_config_flash(info, f))
+ continue;
+
+ if (__readid(info, &id))
+ continue;
+
+ if (id == f->chip_id)
+ return 0;
+ }
+
+#ifdef CONFIG_MTD_NAND_PXA3xx_BUILTIN
for (i = 0; i < ARRAY_SIZE(builtin_flash_types); i++) {
f = builtin_flash_types[i];
@@ -951,7 +954,11 @@ static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info)
if (id == f->chip_id)
return 0;
}
+#endif
+ dev_warn(&info->pdev->dev,
+ "failed to detect configured nand flash; found %04x instead of\n",
+ id);
return -ENODEV;
}
@@ -1014,7 +1021,7 @@ static struct nand_ecclayout hw_largepage_ecclayout = {
static void pxa3xx_nand_init_mtd(struct mtd_info *mtd,
struct pxa3xx_nand_info *info)
{
- struct pxa3xx_nand_flash *f = info->flash_info;
+ const struct pxa3xx_nand_flash *f = info->flash_info;
struct nand_chip *this = &info->nand_chip;
this->options = (f->flash_width == 16) ? NAND_BUSWIDTH_16: 0;
@@ -1135,7 +1142,7 @@ static int pxa3xx_nand_probe(struct platform_device *pdev)
goto fail_free_buf;
}
- ret = pxa3xx_nand_detect_flash(info);
+ ret = pxa3xx_nand_detect_flash(info, pdata);
if (ret) {
dev_err(&pdev->dev, "failed to detect flash\n");
ret = -ENODEV;
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
new file mode 100644
index 00000000000..821acb08ff1
--- /dev/null
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -0,0 +1,878 @@
+/*
+ * SuperH FLCTL nand controller
+ *
+ * Copyright © 2008 Renesas Solutions Corp.
+ * Copyright © 2008 Atom Create Engineering Co., Ltd.
+ *
+ * Based on fsl_elbc_nand.c, Copyright © 2006-2007 Freescale Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/sh_flctl.h>
+
+static struct nand_ecclayout flctl_4secc_oob_16 = {
+ .eccbytes = 10,
+ .eccpos = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+ .oobfree = {
+ {.offset = 12,
+ . length = 4} },
+};
+
+static struct nand_ecclayout flctl_4secc_oob_64 = {
+ .eccbytes = 10,
+ .eccpos = {48, 49, 50, 51, 52, 53, 54, 55, 56, 57},
+ .oobfree = {
+ {.offset = 60,
+ . length = 4} },
+};
+
+static uint8_t scan_ff_pattern[] = { 0xff, 0xff };
+
+static struct nand_bbt_descr flctl_4secc_smallpage = {
+ .options = NAND_BBT_SCAN2NDPAGE,
+ .offs = 11,
+ .len = 1,
+ .pattern = scan_ff_pattern,
+};
+
+static struct nand_bbt_descr flctl_4secc_largepage = {
+ .options = 0,
+ .offs = 58,
+ .len = 2,
+ .pattern = scan_ff_pattern,
+};
+
+static void empty_fifo(struct sh_flctl *flctl)
+{
+ writel(0x000c0000, FLINTDMACR(flctl)); /* FIFO Clear */
+ writel(0x00000000, FLINTDMACR(flctl)); /* Clear Error flags */
+}
+
+static void start_translation(struct sh_flctl *flctl)
+{
+ writeb(TRSTRT, FLTRCR(flctl));
+}
+
+static void wait_completion(struct sh_flctl *flctl)
+{
+ uint32_t timeout = LOOP_TIMEOUT_MAX;
+
+ while (timeout--) {
+ if (readb(FLTRCR(flctl)) & TREND) {
+ writeb(0x0, FLTRCR(flctl));
+ return;
+ }
+ udelay(1);
+ }
+
+ printk(KERN_ERR "wait_completion(): Timeout occured \n");
+ writeb(0x0, FLTRCR(flctl));
+}
+
+static void set_addr(struct mtd_info *mtd, int column, int page_addr)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+ uint32_t addr = 0;
+
+ if (column == -1) {
+ addr = page_addr; /* ERASE1 */
+ } else if (page_addr != -1) {
+ /* SEQIN, READ0, etc.. */
+ if (flctl->page_size) {
+ addr = column & 0x0FFF;
+ addr |= (page_addr & 0xff) << 16;
+ addr |= ((page_addr >> 8) & 0xff) << 24;
+ /* big than 128MB */
+ if (flctl->rw_ADRCNT == ADRCNT2_E) {
+ uint32_t addr2;
+ addr2 = (page_addr >> 16) & 0xff;
+ writel(addr2, FLADR2(flctl));
+ }
+ } else {
+ addr = column;
+ addr |= (page_addr & 0xff) << 8;
+ addr |= ((page_addr >> 8) & 0xff) << 16;
+ addr |= ((page_addr >> 16) & 0xff) << 24;
+ }
+ }
+ writel(addr, FLADR(flctl));
+}
+
+static void wait_rfifo_ready(struct sh_flctl *flctl)
+{
+ uint32_t timeout = LOOP_TIMEOUT_MAX;
+
+ while (timeout--) {
+ uint32_t val;
+ /* check FIFO */
+ val = readl(FLDTCNTR(flctl)) >> 16;
+ if (val & 0xFF)
+ return;
+ udelay(1);
+ }
+ printk(KERN_ERR "wait_rfifo_ready(): Timeout occured \n");
+}
+
+static void wait_wfifo_ready(struct sh_flctl *flctl)
+{
+ uint32_t len, timeout = LOOP_TIMEOUT_MAX;
+
+ while (timeout--) {
+ /* check FIFO */
+ len = (readl(FLDTCNTR(flctl)) >> 16) & 0xFF;
+ if (len >= 4)
+ return;
+ udelay(1);
+ }
+ printk(KERN_ERR "wait_wfifo_ready(): Timeout occured \n");
+}
+
+static int wait_recfifo_ready(struct sh_flctl *flctl)
+{
+ uint32_t timeout = LOOP_TIMEOUT_MAX;
+ int checked[4];
+ void __iomem *ecc_reg[4];
+ int i;
+ uint32_t data, size;
+
+ memset(checked, 0, sizeof(checked));
+
+ while (timeout--) {
+ size = readl(FLDTCNTR(flctl)) >> 24;
+ if (size & 0xFF)
+ return 0; /* success */
+
+ if (readl(FL4ECCCR(flctl)) & _4ECCFA)
+ return 1; /* can't correct */
+
+ udelay(1);
+ if (!(readl(FL4ECCCR(flctl)) & _4ECCEND))
+ continue;
+
+ /* start error correction */
+ ecc_reg[0] = FL4ECCRESULT0(flctl);
+ ecc_reg[1] = FL4ECCRESULT1(flctl);
+ ecc_reg[2] = FL4ECCRESULT2(flctl);
+ ecc_reg[3] = FL4ECCRESULT3(flctl);
+
+ for (i = 0; i < 3; i++) {
+ data = readl(ecc_reg[i]);
+ if (data != INIT_FL4ECCRESULT_VAL && !checked[i]) {
+ uint8_t org;
+ int index;
+
+ index = data >> 16;
+ org = flctl->done_buff[index];
+ flctl->done_buff[index] = org ^ (data & 0xFF);
+ checked[i] = 1;
+ }
+ }
+
+ writel(0, FL4ECCCR(flctl));
+ }
+
+ printk(KERN_ERR "wait_recfifo_ready(): Timeout occured \n");
+ return 1; /* timeout */
+}
+
+static void wait_wecfifo_ready(struct sh_flctl *flctl)
+{
+ uint32_t timeout = LOOP_TIMEOUT_MAX;
+ uint32_t len;
+
+ while (timeout--) {
+ /* check FLECFIFO */
+ len = (readl(FLDTCNTR(flctl)) >> 24) & 0xFF;
+ if (len >= 4)
+ return;
+ udelay(1);
+ }
+ printk(KERN_ERR "wait_wecfifo_ready(): Timeout occured \n");
+}
+
+static void read_datareg(struct sh_flctl *flctl, int offset)
+{
+ unsigned long data;
+ unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
+
+ wait_completion(flctl);
+
+ data = readl(FLDATAR(flctl));
+ *buf = le32_to_cpu(data);
+}
+
+static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+{
+ int i, len_4align;
+ unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
+ void *fifo_addr = (void *)FLDTFIFO(flctl);
+
+ len_4align = (rlen + 3) / 4;
+
+ for (i = 0; i < len_4align; i++) {
+ wait_rfifo_ready(flctl);
+ buf[i] = readl(fifo_addr);
+ buf[i] = be32_to_cpu(buf[i]);
+ }
+}
+
+static int read_ecfiforeg(struct sh_flctl *flctl, uint8_t *buff)
+{
+ int i;
+ unsigned long *ecc_buf = (unsigned long *)buff;
+ void *fifo_addr = (void *)FLECFIFO(flctl);
+
+ for (i = 0; i < 4; i++) {
+ if (wait_recfifo_ready(flctl))
+ return 1;
+ ecc_buf[i] = readl(fifo_addr);
+ ecc_buf[i] = be32_to_cpu(ecc_buf[i]);
+ }
+
+ return 0;
+}
+
+static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+{
+ int i, len_4align;
+ unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
+ void *fifo_addr = (void *)FLDTFIFO(flctl);
+
+ len_4align = (rlen + 3) / 4;
+ for (i = 0; i < len_4align; i++) {
+ wait_wfifo_ready(flctl);
+ writel(cpu_to_be32(data[i]), fifo_addr);
+ }
+}
+
+static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_val)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+ uint32_t flcmncr_val = readl(FLCMNCR(flctl));
+ uint32_t flcmdcr_val, addr_len_bytes = 0;
+
+ /* Set SNAND bit if page size is 2048byte */
+ if (flctl->page_size)
+ flcmncr_val |= SNAND_E;
+ else
+ flcmncr_val &= ~SNAND_E;
+
+ /* default FLCMDCR val */
+ flcmdcr_val = DOCMD1_E | DOADR_E;
+
+ /* Set for FLCMDCR */
+ switch (cmd) {
+ case NAND_CMD_ERASE1:
+ addr_len_bytes = flctl->erase_ADRCNT;
+ flcmdcr_val |= DOCMD2_E;
+ break;
+ case NAND_CMD_READ0:
+ case NAND_CMD_READOOB:
+ addr_len_bytes = flctl->rw_ADRCNT;
+ flcmdcr_val |= CDSRC_E;
+ break;
+ case NAND_CMD_SEQIN:
+ /* This case is that cmd is READ0 or READ1 or READ00 */
+ flcmdcr_val &= ~DOADR_E; /* ONLY execute 1st cmd */
+ break;
+ case NAND_CMD_PAGEPROG:
+ addr_len_bytes = flctl->rw_ADRCNT;
+ flcmdcr_val |= DOCMD2_E | CDSRC_E | SELRW;
+ break;
+ case NAND_CMD_READID:
+ flcmncr_val &= ~SNAND_E;
+ addr_len_bytes = ADRCNT_1;
+ break;
+ case NAND_CMD_STATUS:
+ case NAND_CMD_RESET:
+ flcmncr_val &= ~SNAND_E;
+ flcmdcr_val &= ~(DOADR_E | DOSR_E);
+ break;
+ default:
+ break;
+ }
+
+ /* Set address bytes parameter */
+ flcmdcr_val |= addr_len_bytes;
+
+ /* Now actually write */
+ writel(flcmncr_val, FLCMNCR(flctl));
+ writel(flcmdcr_val, FLCMDCR(flctl));
+ writel(flcmcdr_val, FLCMCDR(flctl));
+}
+
+static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+ uint8_t *buf)
+{
+ int i, eccsize = chip->ecc.size;
+ int eccbytes = chip->ecc.bytes;
+ int eccsteps = chip->ecc.steps;
+ uint8_t *p = buf;
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+
+ for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
+ chip->read_buf(mtd, p, eccsize);
+
+ for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+ if (flctl->hwecc_cant_correct[i])
+ mtd->ecc_stats.failed++;
+ else
+ mtd->ecc_stats.corrected += 0;
+ }
+
+ return 0;
+}
+
+static void flctl_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+ const uint8_t *buf)
+{
+ int i, eccsize = chip->ecc.size;
+ int eccbytes = chip->ecc.bytes;
+ int eccsteps = chip->ecc.steps;
+ const uint8_t *p = buf;
+
+ for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
+ chip->write_buf(mtd, p, eccsize);
+}
+
+static void execmd_read_page_sector(struct mtd_info *mtd, int page_addr)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+ int sector, page_sectors;
+
+ if (flctl->page_size)
+ page_sectors = 4;
+ else
+ page_sectors = 1;
+
+ writel(readl(FLCMNCR(flctl)) | ACM_SACCES_MODE | _4ECCCORRECT,
+ FLCMNCR(flctl));
+
+ set_cmd_regs(mtd, NAND_CMD_READ0,
+ (NAND_CMD_READSTART << 8) | NAND_CMD_READ0);
+
+ for (sector = 0; sector < page_sectors; sector++) {
+ int ret;
+
+ empty_fifo(flctl);
+ writel(readl(FLCMDCR(flctl)) | 1, FLCMDCR(flctl));
+ writel(page_addr << 2 | sector, FLADR(flctl));
+
+ start_translation(flctl);
+ read_fiforeg(flctl, 512, 512 * sector);
+
+ ret = read_ecfiforeg(flctl,
+ &flctl->done_buff[mtd->writesize + 16 * sector]);
+
+ if (ret)
+ flctl->hwecc_cant_correct[sector] = 1;
+
+ writel(0x0, FL4ECCCR(flctl));
+ wait_completion(flctl);
+ }
+ writel(readl(FLCMNCR(flctl)) & ~(ACM_SACCES_MODE | _4ECCCORRECT),
+ FLCMNCR(flctl));
+}
+
+static void execmd_read_oob(struct mtd_info *mtd, int page_addr)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+
+ set_cmd_regs(mtd, NAND_CMD_READ0,
+ (NAND_CMD_READSTART << 8) | NAND_CMD_READ0);
+
+ empty_fifo(flctl);
+ if (flctl->page_size) {
+ int i;
+ /* In case that the page size is 2k */
+ for (i = 0; i < 16 * 3; i++)
+ flctl->done_buff[i] = 0xFF;
+
+ set_addr(mtd, 3 * 528 + 512, page_addr);
+ writel(16, FLDTCNTR(flctl));
+
+ start_translation(flctl);
+ read_fiforeg(flctl, 16, 16 * 3);
+ wait_completion(flctl);
+ } else {
+ /* In case that the page size is 512b */
+ set_addr(mtd, 512, page_addr);
+ writel(16, FLDTCNTR(flctl));
+
+ start_translation(flctl);
+ read_fiforeg(flctl, 16, 0);
+ wait_completion(flctl);
+ }
+}
+
+static void execmd_write_page_sector(struct mtd_info *mtd)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+ int i, page_addr = flctl->seqin_page_addr;
+ int sector, page_sectors;
+
+ if (flctl->page_size)
+ page_sectors = 4;
+ else
+ page_sectors = 1;
+
+ writel(readl(FLCMNCR(flctl)) | ACM_SACCES_MODE, FLCMNCR(flctl));
+
+ set_cmd_regs(mtd, NAND_CMD_PAGEPROG,
+ (NAND_CMD_PAGEPROG << 8) | NAND_CMD_SEQIN);
+
+ for (sector = 0; sector < page_sectors; sector++) {
+ empty_fifo(flctl);
+ writel(readl(FLCMDCR(flctl)) | 1, FLCMDCR(flctl));
+ writel(page_addr << 2 | sector, FLADR(flctl));
+
+ start_translation(flctl);
+ write_fiforeg(flctl, 512, 512 * sector);
+
+ for (i = 0; i < 4; i++) {
+ wait_wecfifo_ready(flctl); /* wait for write ready */
+ writel(0xFFFFFFFF, FLECFIFO(flctl));
+ }
+ wait_completion(flctl);
+ }
+
+ writel(readl(FLCMNCR(flctl)) & ~ACM_SACCES_MODE, FLCMNCR(flctl));
+}
+
+static void execmd_write_oob(struct mtd_info *mtd)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+ int page_addr = flctl->seqin_page_addr;
+ int sector, page_sectors;
+
+ if (flctl->page_size) {
+ sector = 3;
+ page_sectors = 4;
+ } else {
+ sector = 0;
+ page_sectors = 1;
+ }
+
+ set_cmd_regs(mtd, NAND_CMD_PAGEPROG,
+ (NAND_CMD_PAGEPROG << 8) | NAND_CMD_SEQIN);
+
+ for (; sector < page_sectors; sector++) {
+ empty_fifo(flctl);
+ set_addr(mtd, sector * 528 + 512, page_addr);
+ writel(16, FLDTCNTR(flctl)); /* set read size */
+
+ start_translation(flctl);
+ write_fiforeg(flctl, 16, 16 * sector);
+ wait_completion(flctl);
+ }
+}
+
+static void flctl_cmdfunc(struct mtd_info *mtd, unsigned int command,
+ int column, int page_addr)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+ uint32_t read_cmd = 0;
+
+ flctl->read_bytes = 0;
+ if (command != NAND_CMD_PAGEPROG)
+ flctl->index = 0;
+
+ switch (command) {
+ case NAND_CMD_READ1:
+ case NAND_CMD_READ0:
+ if (flctl->hwecc) {
+ /* read page with hwecc */
+ execmd_read_page_sector(mtd, page_addr);
+ break;
+ }
+ empty_fifo(flctl);
+ if (flctl->page_size)
+ set_cmd_regs(mtd, command, (NAND_CMD_READSTART << 8)
+ | command);
+ else
+ set_cmd_regs(mtd, command, command);
+
+ set_addr(mtd, 0, page_addr);
+
+ flctl->read_bytes = mtd->writesize + mtd->oobsize;
+ flctl->index += column;
+ goto read_normal_exit;
+
+ case NAND_CMD_READOOB:
+ if (flctl->hwecc) {
+ /* read page with hwecc */
+ execmd_read_oob(mtd, page_addr);
+ break;
+ }
+
+ empty_fifo(flctl);
+ if (flctl->page_size) {
+ set_cmd_regs(mtd, command, (NAND_CMD_READSTART << 8)
+ | NAND_CMD_READ0);
+ set_addr(mtd, mtd->writesize, page_addr);
+ } else {
+ set_cmd_regs(mtd, command, command);
+ set_addr(mtd, 0, page_addr);
+ }
+ flctl->read_bytes = mtd->oobsize;
+ goto read_normal_exit;
+
+ case NAND_CMD_READID:
+ empty_fifo(flctl);
+ set_cmd_regs(mtd, command, command);
+ set_addr(mtd, 0, 0);
+
+ flctl->read_bytes = 4;
+ writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
+ start_translation(flctl);
+ read_datareg(flctl, 0); /* read and end */
+ break;
+
+ case NAND_CMD_ERASE1:
+ flctl->erase1_page_addr = page_addr;
+ break;
+
+ case NAND_CMD_ERASE2:
+ set_cmd_regs(mtd, NAND_CMD_ERASE1,
+ (command << 8) | NAND_CMD_ERASE1);
+ set_addr(mtd, -1, flctl->erase1_page_addr);
+ start_translation(flctl);
+ wait_completion(flctl);
+ break;
+
+ case NAND_CMD_SEQIN:
+ if (!flctl->page_size) {
+ /* output read command */
+ if (column >= mtd->writesize) {
+ column -= mtd->writesize;
+ read_cmd = NAND_CMD_READOOB;
+ } else if (column < 256) {
+ read_cmd = NAND_CMD_READ0;
+ } else {
+ column -= 256;
+ read_cmd = NAND_CMD_READ1;
+ }
+ }
+ flctl->seqin_column = column;
+ flctl->seqin_page_addr = page_addr;
+ flctl->seqin_read_cmd = read_cmd;
+ break;
+
+ case NAND_CMD_PAGEPROG:
+ empty_fifo(flctl);
+ if (!flctl->page_size) {
+ set_cmd_regs(mtd, NAND_CMD_SEQIN,
+ flctl->seqin_read_cmd);
+ set_addr(mtd, -1, -1);
+ writel(0, FLDTCNTR(flctl)); /* set 0 size */
+ start_translation(flctl);
+ wait_completion(flctl);
+ }
+ if (flctl->hwecc) {
+ /* write page with hwecc */
+ if (flctl->seqin_column == mtd->writesize)
+ execmd_write_oob(mtd);
+ else if (!flctl->seqin_column)
+ execmd_write_page_sector(mtd);
+ else
+ printk(KERN_ERR "Invalid address !?\n");
+ break;
+ }
+ set_cmd_regs(mtd, command, (command << 8) | NAND_CMD_SEQIN);
+ set_addr(mtd, flctl->seqin_column, flctl->seqin_page_addr);
+ writel(flctl->index, FLDTCNTR(flctl)); /* set write size */
+ start_translation(flctl);
+ write_fiforeg(flctl, flctl->index, 0);
+ wait_completion(flctl);
+ break;
+
+ case NAND_CMD_STATUS:
+ set_cmd_regs(mtd, command, command);
+ set_addr(mtd, -1, -1);
+
+ flctl->read_bytes = 1;
+ writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
+ start_translation(flctl);
+ read_datareg(flctl, 0); /* read and end */
+ break;
+
+ case NAND_CMD_RESET:
+ set_cmd_regs(mtd, command, command);
+ set_addr(mtd, -1, -1);
+
+ writel(0, FLDTCNTR(flctl)); /* set 0 size */
+ start_translation(flctl);
+ wait_completion(flctl);
+ break;
+
+ default:
+ break;
+ }
+ return;
+
+read_normal_exit:
+ writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
+ start_translation(flctl);
+ read_fiforeg(flctl, flctl->read_bytes, 0);
+ wait_completion(flctl);
+ return;
+}
+
+static void flctl_select_chip(struct mtd_info *mtd, int chipnr)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+ uint32_t flcmncr_val = readl(FLCMNCR(flctl));
+
+ switch (chipnr) {
+ case -1:
+ flcmncr_val &= ~CE0_ENABLE;
+ writel(flcmncr_val, FLCMNCR(flctl));
+ break;
+ case 0:
+ flcmncr_val |= CE0_ENABLE;
+ writel(flcmncr_val, FLCMNCR(flctl));
+ break;
+ default:
+ BUG();
+ }
+}
+
+static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+ int i, index = flctl->index;
+
+ for (i = 0; i < len; i++)
+ flctl->done_buff[index + i] = buf[i];
+ flctl->index += len;
+}
+
+static uint8_t flctl_read_byte(struct mtd_info *mtd)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+ int index = flctl->index;
+ uint8_t data;
+
+ data = flctl->done_buff[index];
+ flctl->index++;
+ return data;
+}
+
+static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ buf[i] = flctl_read_byte(mtd);
+}
+
+static int flctl_verify_buf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ if (buf[i] != flctl_read_byte(mtd))
+ return -EFAULT;
+ return 0;
+}
+
+static void flctl_register_init(struct sh_flctl *flctl, unsigned long val)
+{
+ writel(val, FLCMNCR(flctl));
+}
+
+static int flctl_chip_init_tail(struct mtd_info *mtd)
+{
+ struct sh_flctl *flctl = mtd_to_flctl(mtd);
+ struct nand_chip *chip = &flctl->chip;
+
+ if (mtd->writesize == 512) {
+ flctl->page_size = 0;
+ if (chip->chipsize > (32 << 20)) {
+ /* big than 32MB */
+ flctl->rw_ADRCNT = ADRCNT_4;
+ flctl->erase_ADRCNT = ADRCNT_3;
+ } else if (chip->chipsize > (2 << 16)) {
+ /* big than 128KB */
+ flctl->rw_ADRCNT = ADRCNT_3;
+ flctl->erase_ADRCNT = ADRCNT_2;
+ } else {
+ flctl->rw_ADRCNT = ADRCNT_2;
+ flctl->erase_ADRCNT = ADRCNT_1;
+ }
+ } else {
+ flctl->page_size = 1;
+ if (chip->chipsize > (128 << 20)) {
+ /* big than 128MB */
+ flctl->rw_ADRCNT = ADRCNT2_E;
+ flctl->erase_ADRCNT = ADRCNT_3;
+ } else if (chip->chipsize > (8 << 16)) {
+ /* big than 512KB */
+ flctl->rw_ADRCNT = ADRCNT_4;
+ flctl->erase_ADRCNT = ADRCNT_2;
+ } else {
+ flctl->rw_ADRCNT = ADRCNT_3;
+ flctl->erase_ADRCNT = ADRCNT_1;
+ }
+ }
+
+ if (flctl->hwecc) {
+ if (mtd->writesize == 512) {
+ chip->ecc.layout = &flctl_4secc_oob_16;
+ chip->badblock_pattern = &flctl_4secc_smallpage;
+ } else {
+ chip->ecc.layout = &flctl_4secc_oob_64;
+ chip->badblock_pattern = &flctl_4secc_largepage;
+ }
+
+ chip->ecc.size = 512;
+ chip->ecc.bytes = 10;
+ chip->ecc.read_page = flctl_read_page_hwecc;
+ chip->ecc.write_page = flctl_write_page_hwecc;
+ chip->ecc.mode = NAND_ECC_HW;
+
+ /* 4 symbols ECC enabled */
+ writel(readl(FLCMNCR(flctl)) | _4ECCEN | ECCPOS2 | ECCPOS_02,
+ FLCMNCR(flctl));
+ } else {
+ chip->ecc.mode = NAND_ECC_SOFT;
+ }
+
+ return 0;
+}
+
+static int __init flctl_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ struct sh_flctl *flctl;
+ struct mtd_info *flctl_mtd;
+ struct nand_chip *nand;
+ struct sh_flctl_platform_data *pdata;
+ int ret;
+
+ pdata = pdev->dev.platform_data;
+ if (pdata == NULL) {
+ printk(KERN_ERR "sh_flctl platform_data not found.\n");
+ return -ENODEV;
+ }
+
+ flctl = kzalloc(sizeof(struct sh_flctl), GFP_KERNEL);
+ if (!flctl) {
+ printk(KERN_ERR "Unable to allocate NAND MTD dev structure.\n");
+ return -ENOMEM;
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ printk(KERN_ERR "%s: resource not found.\n", __func__);
+ ret = -ENODEV;
+ goto err;
+ }
+
+ flctl->reg = ioremap(res->start, res->end - res->start + 1);
+ if (flctl->reg == NULL) {
+ printk(KERN_ERR "%s: ioremap error.\n", __func__);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ platform_set_drvdata(pdev, flctl);
+ flctl_mtd = &flctl->mtd;
+ nand = &flctl->chip;
+ flctl_mtd->priv = nand;
+ flctl->hwecc = pdata->has_hwecc;
+
+ flctl_register_init(flctl, pdata->flcmncr_val);
+
+ nand->options = NAND_NO_AUTOINCR;
+
+ /* Set address of hardware control function */
+ /* 20 us command delay time */
+ nand->chip_delay = 20;
+
+ nand->read_byte = flctl_read_byte;
+ nand->write_buf = flctl_write_buf;
+ nand->read_buf = flctl_read_buf;
+ nand->verify_buf = flctl_verify_buf;
+ nand->select_chip = flctl_select_chip;
+ nand->cmdfunc = flctl_cmdfunc;
+
+ ret = nand_scan_ident(flctl_mtd, 1);
+ if (ret)
+ goto err;
+
+ ret = flctl_chip_init_tail(flctl_mtd);
+ if (ret)
+ goto err;
+
+ ret = nand_scan_tail(flctl_mtd);
+ if (ret)
+ goto err;
+
+ add_mtd_partitions(flctl_mtd, pdata->parts, pdata->nr_parts);
+
+ return 0;
+
+err:
+ kfree(flctl);
+ return ret;
+}
+
+static int __exit flctl_remove(struct platform_device *pdev)
+{
+ struct sh_flctl *flctl = platform_get_drvdata(pdev);
+
+ nand_release(&flctl->mtd);
+ kfree(flctl);
+
+ return 0;
+}
+
+static struct platform_driver flctl_driver = {
+ .probe = flctl_probe,
+ .remove = flctl_remove,
+ .driver = {
+ .name = "sh_flctl",
+ .owner = THIS_MODULE,
+ },
+};
+
+static int __init flctl_nand_init(void)
+{
+ return platform_driver_register(&flctl_driver);
+}
+
+static void __exit flctl_nand_cleanup(void)
+{
+ platform_driver_unregister(&flctl_driver);
+}
+
+module_init(flctl_nand_init);
+module_exit(flctl_nand_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yoshihiro Shimoda");
+MODULE_DESCRIPTION("SuperH FLCTL driver");
+MODULE_ALIAS("platform:sh_flctl");
diff --git a/drivers/mtd/nand/toto.c b/drivers/mtd/nand/toto.c
deleted file mode 100644
index bbf492e6830..00000000000
--- a/drivers/mtd/nand/toto.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * drivers/mtd/nand/toto.c
- *
- * Copyright (c) 2003 Texas Instruments
- *
- * Derived from drivers/mtd/autcpu12.c
- *
- * Copyright (c) 2002 Thomas Gleixner <tgxl@linutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Overview:
- * This is a device driver for the NAND flash device found on the
- * TI fido board. It supports 32MiB and 64MiB cards
- */
-
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/nand.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/arch/hardware.h>
-#include <asm/sizes.h>
-#include <asm/arch/toto.h>
-#include <asm/arch-omap1510/hardware.h>
-#include <asm/arch/gpio.h>
-
-#define CONFIG_NAND_WORKAROUND 1
-
-/*
- * MTD structure for TOTO board
- */
-static struct mtd_info *toto_mtd = NULL;
-
-static unsigned long toto_io_base = OMAP_FLASH_1_BASE;
-
-/*
- * Define partitions for flash devices
- */
-
-static struct mtd_partition partition_info64M[] = {
- { .name = "toto kernel partition 1",
- .offset = 0,
- .size = 2 * SZ_1M },
- { .name = "toto file sys partition 2",
- .offset = 2 * SZ_1M,
- .size = 14 * SZ_1M },
- { .name = "toto user partition 3",
- .offset = 16 * SZ_1M,
- .size = 16 * SZ_1M },
- { .name = "toto devboard extra partition 4",
- .offset = 32 * SZ_1M,
- .size = 32 * SZ_1M },
-};
-
-static struct mtd_partition partition_info32M[] = {
- { .name = "toto kernel partition 1",
- .offset = 0,
- .size = 2 * SZ_1M },
- { .name = "toto file sys partition 2",
- .offset = 2 * SZ_1M,
- .size = 14 * SZ_1M },
- { .name = "toto user partition 3",
- .offset = 16 * SZ_1M,
- .size = 16 * SZ_1M },
-};
-
-#define NUM_PARTITIONS32M 3
-#define NUM_PARTITIONS64M 4
-
-/*
- * hardware specific access to control-lines
- *
- * ctrl:
- * NAND_NCE: bit 0 -> bit 14 (0x4000)
- * NAND_CLE: bit 1 -> bit 12 (0x1000)
- * NAND_ALE: bit 2 -> bit 1 (0x0002)
- */
-static void toto_hwcontrol(struct mtd_info *mtd, int cmd,
- unsigned int ctrl)
-{
- struct nand_chip *chip = mtd->priv;
-
- if (ctrl & NAND_CTRL_CHANGE) {
- unsigned long bits;
-
- /* hopefully enough time for tc make proceding write to clear */
- udelay(1);
-
- bits = (~ctrl & NAND_NCE) << 14;
- bits |= (ctrl & NAND_CLE) << 12;
- bits |= (ctrl & NAND_ALE) >> 1;
-
-#warning Wild guess as gpiosetout() is nowhere defined in the kernel source - tglx
- gpiosetout(0x5002, bits);
-
-#ifdef CONFIG_NAND_WORKAROUND
- /* "some" dev boards busted, blue wired to rts2 :( */
- rts2setout(2, (ctrl & NAND_CLE) << 1);
-#endif
- /* allow time to ensure gpio state to over take memory write */
- udelay(1);
- }
-
- if (cmd != NAND_CMD_NONE)
- writeb(cmd, chip->IO_ADDR_W);
-}
-
-/*
- * Main initialization routine
- */
-static int __init toto_init(void)
-{
- struct nand_chip *this;
- int err = 0;
-
- /* Allocate memory for MTD device structure and private data */
- toto_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
- if (!toto_mtd) {
- printk(KERN_WARNING "Unable to allocate toto NAND MTD device structure.\n");
- err = -ENOMEM;
- goto out;
- }
-
- /* Get pointer to private data */
- this = (struct nand_chip *)(&toto_mtd[1]);
-
- /* Initialize structures */
- memset(toto_mtd, 0, sizeof(struct mtd_info));
- memset(this, 0, sizeof(struct nand_chip));
-
- /* Link the private data with the MTD structure */
- toto_mtd->priv = this;
- toto_mtd->owner = THIS_MODULE;
-
- /* Set address of NAND IO lines */
- this->IO_ADDR_R = toto_io_base;
- this->IO_ADDR_W = toto_io_base;
- this->cmd_ctrl = toto_hwcontrol;
- this->dev_ready = NULL;
- /* 25 us command delay time */
- this->chip_delay = 30;
- this->ecc.mode = NAND_ECC_SOFT;
-
- /* Scan to find existance of the device */
- if (nand_scan(toto_mtd, 1)) {
- err = -ENXIO;
- goto out_mtd;
- }
-
- /* Register the partitions */
- switch (toto_mtd->size) {
- case SZ_64M:
- add_mtd_partitions(toto_mtd, partition_info64M, NUM_PARTITIONS64M);
- break;
- case SZ_32M:
- add_mtd_partitions(toto_mtd, partition_info32M, NUM_PARTITIONS32M);
- break;
- default:{
- printk(KERN_WARNING "Unsupported Nand device\n");
- err = -ENXIO;
- goto out_buf;
- }
- }
-
- gpioreserve(NAND_MASK); /* claim our gpios */
- archflashwp(0, 0); /* open up flash for writing */
-
- goto out;
-
- out_mtd:
- kfree(toto_mtd);
- out:
- return err;
-}
-
-module_init(toto_init);
-
-/*
- * Clean up routine
- */
-static void __exit toto_cleanup(void)
-{
- /* Release resources, unregister device */
- nand_release(toto_mtd);
-
- /* Free the MTD device structure */
- kfree(toto_mtd);
-
- /* stop flash writes */
- archflashwp(0, 1);
-
- /* release gpios to system */
- gpiorelease(NAND_MASK);
-}
-
-module_exit(toto_cleanup);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Richard Woodruff <r-woodruff2@ti.com>");
-MODULE_DESCRIPTION("Glue layer for NAND flash on toto board");
diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index 4f80c2fd89a..9e45b3f39c0 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -20,7 +20,6 @@
#include <linux/mtd/partitions.h>
int __devinit of_mtd_parse_partitions(struct device *dev,
- struct mtd_info *mtd,
struct device_node *node,
struct mtd_partition **pparts)
{
diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig
index cb41cbca64f..79fa79e8f8d 100644
--- a/drivers/mtd/onenand/Kconfig
+++ b/drivers/mtd/onenand/Kconfig
@@ -27,8 +27,16 @@ config MTD_ONENAND_GENERIC
help
Support for OneNAND flash via platform device driver.
+config MTD_ONENAND_OMAP2
+ tristate "OneNAND on OMAP2/OMAP3 support"
+ depends on MTD_ONENAND && (ARCH_OMAP2 || ARCH_OMAP3)
+ help
+ Support for a OneNAND flash device connected to an OMAP2/OMAP3 CPU
+ via the GPMC memory controller.
+
config MTD_ONENAND_OTP
bool "OneNAND OTP Support"
+ select HAVE_MTD_OTP
help
One Block of the NAND Flash Array memory is reserved as
a One-Time Programmable Block memory area.
diff --git a/drivers/mtd/onenand/Makefile b/drivers/mtd/onenand/Makefile
index 4d2eacfd7e1..64b6cc61a52 100644
--- a/drivers/mtd/onenand/Makefile
+++ b/drivers/mtd/onenand/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_MTD_ONENAND) += onenand.o
# Board specific.
obj-$(CONFIG_MTD_ONENAND_GENERIC) += generic.o
+obj-$(CONFIG_MTD_ONENAND_OMAP2) += omap2.o
# Simulator
obj-$(CONFIG_MTD_ONENAND_SIM) += onenand_sim.o
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
new file mode 100644
index 00000000000..8387e05daae
--- /dev/null
+++ b/drivers/mtd/onenand/omap2.c
@@ -0,0 +1,802 @@
+/*
+ * linux/drivers/mtd/onenand/omap2.c
+ *
+ * OneNAND driver for OMAP2 / OMAP3
+ *
+ * Copyright © 2005-2006 Nokia Corporation
+ *
+ * Author: Jarkko Lavinen <jarkko.lavinen@nokia.com> and Juha Yrjölä
+ * IRQ and DMA support written by Timo Teras
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; see the file COPYING. If not, write to the Free Software
+ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/onenand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <asm/io.h>
+#include <asm/mach/flash.h>
+#include <asm/arch/gpmc.h>
+#include <asm/arch/onenand.h>
+#include <asm/arch/gpio.h>
+#include <asm/arch/gpmc.h>
+#include <asm/arch/pm.h>
+
+#include <linux/dma-mapping.h>
+#include <asm/dma-mapping.h>
+#include <asm/arch/dma.h>
+
+#include <asm/arch/board.h>
+
+#define DRIVER_NAME "omap2-onenand"
+
+#define ONENAND_IO_SIZE SZ_128K
+#define ONENAND_BUFRAM_SIZE (1024 * 5)
+
+struct omap2_onenand {
+ struct platform_device *pdev;
+ int gpmc_cs;
+ unsigned long phys_base;
+ int gpio_irq;
+ struct mtd_info mtd;
+ struct mtd_partition *parts;
+ struct onenand_chip onenand;
+ struct completion irq_done;
+ struct completion dma_done;
+ int dma_channel;
+ int freq;
+ int (*setup)(void __iomem *base, int freq);
+};
+
+static void omap2_onenand_dma_cb(int lch, u16 ch_status, void *data)
+{
+ struct omap2_onenand *c = data;
+
+ complete(&c->dma_done);
+}
+
+static irqreturn_t omap2_onenand_interrupt(int irq, void *dev_id)
+{
+ struct omap2_onenand *c = dev_id;
+
+ complete(&c->irq_done);
+
+ return IRQ_HANDLED;
+}
+
+static inline unsigned short read_reg(struct omap2_onenand *c, int reg)
+{
+ return readw(c->onenand.base + reg);
+}
+
+static inline void write_reg(struct omap2_onenand *c, unsigned short value,
+ int reg)
+{
+ writew(value, c->onenand.base + reg);
+}
+
+static void wait_err(char *msg, int state, unsigned int ctrl, unsigned int intr)
+{
+ printk(KERN_ERR "onenand_wait: %s! state %d ctrl 0x%04x intr 0x%04x\n",
+ msg, state, ctrl, intr);
+}
+
+static void wait_warn(char *msg, int state, unsigned int ctrl,
+ unsigned int intr)
+{
+ printk(KERN_WARNING "onenand_wait: %s! state %d ctrl 0x%04x "
+ "intr 0x%04x\n", msg, state, ctrl, intr);
+}
+
+static int omap2_onenand_wait(struct mtd_info *mtd, int state)
+{
+ struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+ unsigned int intr = 0;
+ unsigned int ctrl;
+ unsigned long timeout;
+ u32 syscfg;
+
+ if (state == FL_RESETING) {
+ int i;
+
+ for (i = 0; i < 20; i++) {
+ udelay(1);
+ intr = read_reg(c, ONENAND_REG_INTERRUPT);
+ if (intr & ONENAND_INT_MASTER)
+ break;
+ }
+ ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+ if (ctrl & ONENAND_CTRL_ERROR) {
+ wait_err("controller error", state, ctrl, intr);
+ return -EIO;
+ }
+ if (!(intr & ONENAND_INT_RESET)) {
+ wait_err("timeout", state, ctrl, intr);
+ return -EIO;
+ }
+ return 0;
+ }
+
+ if (state != FL_READING) {
+ int result;
+
+ /* Turn interrupts on */
+ syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+ if (!(syscfg & ONENAND_SYS_CFG1_IOBE)) {
+ syscfg |= ONENAND_SYS_CFG1_IOBE;
+ write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
+ if (cpu_is_omap34xx())
+ /* Add a delay to let GPIO settle */
+ syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+ }
+
+ INIT_COMPLETION(c->irq_done);
+ if (c->gpio_irq) {
+ result = omap_get_gpio_datain(c->gpio_irq);
+ if (result == -1) {
+ ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+ intr = read_reg(c, ONENAND_REG_INTERRUPT);
+ wait_err("gpio error", state, ctrl, intr);
+ return -EIO;
+ }
+ } else
+ result = 0;
+ if (result == 0) {
+ int retry_cnt = 0;
+retry:
+ result = wait_for_completion_timeout(&c->irq_done,
+ msecs_to_jiffies(20));
+ if (result == 0) {
+ /* Timeout after 20ms */
+ ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+ if (ctrl & ONENAND_CTRL_ONGO) {
+ /*
+ * The operation seems to be still going
+ * so give it some more time.
+ */
+ retry_cnt += 1;
+ if (retry_cnt < 3)
+ goto retry;
+ intr = read_reg(c,
+ ONENAND_REG_INTERRUPT);
+ wait_err("timeout", state, ctrl, intr);
+ return -EIO;
+ }
+ intr = read_reg(c, ONENAND_REG_INTERRUPT);
+ if ((intr & ONENAND_INT_MASTER) == 0)
+ wait_warn("timeout", state, ctrl, intr);
+ }
+ }
+ } else {
+ int retry_cnt = 0;
+
+ /* Turn interrupts off */
+ syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+ syscfg &= ~ONENAND_SYS_CFG1_IOBE;
+ write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
+
+ timeout = jiffies + msecs_to_jiffies(20);
+ while (1) {
+ if (time_before(jiffies, timeout)) {
+ intr = read_reg(c, ONENAND_REG_INTERRUPT);
+ if (intr & ONENAND_INT_MASTER)
+ break;
+ } else {
+ /* Timeout after 20ms */
+ ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+ if (ctrl & ONENAND_CTRL_ONGO) {
+ /*
+ * The operation seems to be still going
+ * so give it some more time.
+ */
+ retry_cnt += 1;
+ if (retry_cnt < 3) {
+ timeout = jiffies +
+ msecs_to_jiffies(20);
+ continue;
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ intr = read_reg(c, ONENAND_REG_INTERRUPT);
+ ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+
+ if (intr & ONENAND_INT_READ) {
+ int ecc = read_reg(c, ONENAND_REG_ECC_STATUS);
+
+ if (ecc) {
+ unsigned int addr1, addr8;
+
+ addr1 = read_reg(c, ONENAND_REG_START_ADDRESS1);
+ addr8 = read_reg(c, ONENAND_REG_START_ADDRESS8);
+ if (ecc & ONENAND_ECC_2BIT_ALL) {
+ printk(KERN_ERR "onenand_wait: ECC error = "
+ "0x%04x, addr1 %#x, addr8 %#x\n",
+ ecc, addr1, addr8);
+ mtd->ecc_stats.failed++;
+ return -EBADMSG;
+ } else if (ecc & ONENAND_ECC_1BIT_ALL) {
+ printk(KERN_NOTICE "onenand_wait: correctable "
+ "ECC error = 0x%04x, addr1 %#x, "
+ "addr8 %#x\n", ecc, addr1, addr8);
+ mtd->ecc_stats.corrected++;
+ }
+ }
+ } else if (state == FL_READING) {
+ wait_err("timeout", state, ctrl, intr);
+ return -EIO;
+ }
+
+ if (ctrl & ONENAND_CTRL_ERROR) {
+ wait_err("controller error", state, ctrl, intr);
+ if (ctrl & ONENAND_CTRL_LOCK)
+ printk(KERN_ERR "onenand_wait: "
+ "Device is write protected!!!\n");
+ return -EIO;
+ }
+
+ if (ctrl & 0xFE9F)
+ wait_warn("unexpected controller status", state, ctrl, intr);
+
+ return 0;
+}
+
+static inline int omap2_onenand_bufferram_offset(struct mtd_info *mtd, int area)
+{
+ struct onenand_chip *this = mtd->priv;
+
+ if (ONENAND_CURRENT_BUFFERRAM(this)) {
+ if (area == ONENAND_DATARAM)
+ return mtd->writesize;
+ if (area == ONENAND_SPARERAM)
+ return mtd->oobsize;
+ }
+
+ return 0;
+}
+
+#if defined(CONFIG_ARCH_OMAP3) || defined(MULTI_OMAP2)
+
+static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area,
+ unsigned char *buffer, int offset,
+ size_t count)
+{
+ struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+ struct onenand_chip *this = mtd->priv;
+ dma_addr_t dma_src, dma_dst;
+ int bram_offset;
+ unsigned long timeout;
+ void *buf = (void *)buffer;
+ size_t xtra;
+ volatile unsigned *done;
+
+ bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+ if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
+ goto out_copy;
+
+ if (buf >= high_memory) {
+ struct page *p1;
+
+ if (((size_t)buf & PAGE_MASK) !=
+ ((size_t)(buf + count - 1) & PAGE_MASK))
+ goto out_copy;
+ p1 = vmalloc_to_page(buf);
+ if (!p1)
+ goto out_copy;
+ buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
+ }
+
+ xtra = count & 3;
+ if (xtra) {
+ count -= xtra;
+ memcpy(buf + count, this->base + bram_offset + count, xtra);
+ }
+
+ dma_src = c->phys_base + bram_offset;
+ dma_dst = dma_map_single(&c->pdev->dev, buf, count, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+ dev_err(&c->pdev->dev,
+ "Couldn't DMA map a %d byte buffer\n",
+ count);
+ goto out_copy;
+ }
+
+ omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+ count >> 2, 1, 0, 0, 0);
+ omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+ dma_src, 0, 0);
+ omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+ dma_dst, 0, 0);
+
+ INIT_COMPLETION(c->dma_done);
+ omap_start_dma(c->dma_channel);
+
+ timeout = jiffies + msecs_to_jiffies(20);
+ done = &c->dma_done.done;
+ while (time_before(jiffies, timeout))
+ if (*done)
+ break;
+
+ dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
+
+ if (!*done) {
+ dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+ goto out_copy;
+ }
+
+ return 0;
+
+out_copy:
+ memcpy(buf, this->base + bram_offset, count);
+ return 0;
+}
+
+static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area,
+ const unsigned char *buffer,
+ int offset, size_t count)
+{
+ struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+ struct onenand_chip *this = mtd->priv;
+ dma_addr_t dma_src, dma_dst;
+ int bram_offset;
+ unsigned long timeout;
+ void *buf = (void *)buffer;
+ volatile unsigned *done;
+
+ bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+ if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
+ goto out_copy;
+
+ /* panic_write() may be in an interrupt context */
+ if (in_interrupt())
+ goto out_copy;
+
+ if (buf >= high_memory) {
+ struct page *p1;
+
+ if (((size_t)buf & PAGE_MASK) !=
+ ((size_t)(buf + count - 1) & PAGE_MASK))
+ goto out_copy;
+ p1 = vmalloc_to_page(buf);
+ if (!p1)
+ goto out_copy;
+ buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
+ }
+
+ dma_src = dma_map_single(&c->pdev->dev, buf, count, DMA_TO_DEVICE);
+ dma_dst = c->phys_base + bram_offset;
+ if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+ dev_err(&c->pdev->dev,
+ "Couldn't DMA map a %d byte buffer\n",
+ count);
+ return -1;
+ }
+
+ omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+ count >> 2, 1, 0, 0, 0);
+ omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+ dma_src, 0, 0);
+ omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+ dma_dst, 0, 0);
+
+ INIT_COMPLETION(c->dma_done);
+ omap_start_dma(c->dma_channel);
+
+ timeout = jiffies + msecs_to_jiffies(20);
+ done = &c->dma_done.done;
+ while (time_before(jiffies, timeout))
+ if (*done)
+ break;
+
+ dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE);
+
+ if (!*done) {
+ dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+ goto out_copy;
+ }
+
+ return 0;
+
+out_copy:
+ memcpy(this->base + bram_offset, buf, count);
+ return 0;
+}
+
+#else
+
+int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area,
+ unsigned char *buffer, int offset,
+ size_t count);
+
+int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area,
+ const unsigned char *buffer,
+ int offset, size_t count);
+
+#endif
+
+#if defined(CONFIG_ARCH_OMAP2) || defined(MULTI_OMAP2)
+
+static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
+ unsigned char *buffer, int offset,
+ size_t count)
+{
+ struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+ struct onenand_chip *this = mtd->priv;
+ dma_addr_t dma_src, dma_dst;
+ int bram_offset;
+
+ bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+ /* DMA is not used. Revisit PM requirements before enabling it. */
+ if (1 || (c->dma_channel < 0) ||
+ ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) ||
+ (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) {
+ memcpy(buffer, (__force void *)(this->base + bram_offset),
+ count);
+ return 0;
+ }
+
+ dma_src = c->phys_base + bram_offset;
+ dma_dst = dma_map_single(&c->pdev->dev, buffer, count,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+ dev_err(&c->pdev->dev,
+ "Couldn't DMA map a %d byte buffer\n",
+ count);
+ return -1;
+ }
+
+ omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+ count / 4, 1, 0, 0, 0);
+ omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+ dma_src, 0, 0);
+ omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+ dma_dst, 0, 0);
+
+ INIT_COMPLETION(c->dma_done);
+ omap_start_dma(c->dma_channel);
+ wait_for_completion(&c->dma_done);
+
+ dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
+
+ return 0;
+}
+
+static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
+ const unsigned char *buffer,
+ int offset, size_t count)
+{
+ struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+ struct onenand_chip *this = mtd->priv;
+ dma_addr_t dma_src, dma_dst;
+ int bram_offset;
+
+ bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+ /* DMA is not used. Revisit PM requirements before enabling it. */
+ if (1 || (c->dma_channel < 0) ||
+ ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) ||
+ (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) {
+ memcpy((__force void *)(this->base + bram_offset), buffer,
+ count);
+ return 0;
+ }
+
+ dma_src = dma_map_single(&c->pdev->dev, (void *) buffer, count,
+ DMA_TO_DEVICE);
+ dma_dst = c->phys_base + bram_offset;
+ if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+ dev_err(&c->pdev->dev,
+ "Couldn't DMA map a %d byte buffer\n",
+ count);
+ return -1;
+ }
+
+ omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S16,
+ count / 2, 1, 0, 0, 0);
+ omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+ dma_src, 0, 0);
+ omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+ dma_dst, 0, 0);
+
+ INIT_COMPLETION(c->dma_done);
+ omap_start_dma(c->dma_channel);
+ wait_for_completion(&c->dma_done);
+
+ dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE);
+
+ return 0;
+}
+
+#else
+
+int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
+ unsigned char *buffer, int offset,
+ size_t count);
+
+int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
+ const unsigned char *buffer,
+ int offset, size_t count);
+
+#endif
+
+static struct platform_driver omap2_onenand_driver;
+
+static int __adjust_timing(struct device *dev, void *data)
+{
+ int ret = 0;
+ struct omap2_onenand *c;
+
+ c = dev_get_drvdata(dev);
+
+ BUG_ON(c->setup == NULL);
+
+ /* DMA is not in use so this is all that is needed */
+ /* Revisit for OMAP3! */
+ ret = c->setup(c->onenand.base, c->freq);
+
+ return ret;
+}
+
+int omap2_onenand_rephase(void)
+{
+ return driver_for_each_device(&omap2_onenand_driver.driver, NULL,
+ NULL, __adjust_timing);
+}
+
+static void __devexit omap2_onenand_shutdown(struct platform_device *pdev)
+{
+ struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
+
+ /* With certain content in the buffer RAM, the OMAP boot ROM code
+ * can recognize the flash chip incorrectly. Zero it out before
+ * soft reset.
+ */
+ memset((__force void *)c->onenand.base, 0, ONENAND_BUFRAM_SIZE);
+}
+
+static int __devinit omap2_onenand_probe(struct platform_device *pdev)
+{
+ struct omap_onenand_platform_data *pdata;
+ struct omap2_onenand *c;
+ int r;
+
+ pdata = pdev->dev.platform_data;
+ if (pdata == NULL) {
+ dev_err(&pdev->dev, "platform data missing\n");
+ return -ENODEV;
+ }
+
+ c = kzalloc(sizeof(struct omap2_onenand), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+
+ init_completion(&c->irq_done);
+ init_completion(&c->dma_done);
+ c->gpmc_cs = pdata->cs;
+ c->gpio_irq = pdata->gpio_irq;
+ c->dma_channel = pdata->dma_channel;
+ if (c->dma_channel < 0) {
+ /* if -1, don't use DMA */
+ c->gpio_irq = 0;
+ }
+
+ r = gpmc_cs_request(c->gpmc_cs, ONENAND_IO_SIZE, &c->phys_base);
+ if (r < 0) {
+ dev_err(&pdev->dev, "Cannot request GPMC CS\n");
+ goto err_kfree;
+ }
+
+ if (request_mem_region(c->phys_base, ONENAND_IO_SIZE,
+ pdev->dev.driver->name) == NULL) {
+ dev_err(&pdev->dev, "Cannot reserve memory region at 0x%08lx, "
+ "size: 0x%x\n", c->phys_base, ONENAND_IO_SIZE);
+ r = -EBUSY;
+ goto err_free_cs;
+ }
+ c->onenand.base = ioremap(c->phys_base, ONENAND_IO_SIZE);
+ if (c->onenand.base == NULL) {
+ r = -ENOMEM;
+ goto err_release_mem_region;
+ }
+
+ if (pdata->onenand_setup != NULL) {
+ r = pdata->onenand_setup(c->onenand.base, c->freq);
+ if (r < 0) {
+ dev_err(&pdev->dev, "Onenand platform setup failed: "
+ "%d\n", r);
+ goto err_iounmap;
+ }
+ c->setup = pdata->onenand_setup;
+ }
+
+ if (c->gpio_irq) {
+ if ((r = omap_request_gpio(c->gpio_irq)) < 0) {
+ dev_err(&pdev->dev, "Failed to request GPIO%d for "
+ "OneNAND\n", c->gpio_irq);
+ goto err_iounmap;
+ }
+ omap_set_gpio_direction(c->gpio_irq, 1);
+
+ if ((r = request_irq(OMAP_GPIO_IRQ(c->gpio_irq),
+ omap2_onenand_interrupt, IRQF_TRIGGER_RISING,
+ pdev->dev.driver->name, c)) < 0)
+ goto err_release_gpio;
+ }
+
+ if (c->dma_channel >= 0) {
+ r = omap_request_dma(0, pdev->dev.driver->name,
+ omap2_onenand_dma_cb, (void *) c,
+ &c->dma_channel);
+ if (r == 0) {
+ omap_set_dma_write_mode(c->dma_channel,
+ OMAP_DMA_WRITE_NON_POSTED);
+ omap_set_dma_src_data_pack(c->dma_channel, 1);
+ omap_set_dma_src_burst_mode(c->dma_channel,
+ OMAP_DMA_DATA_BURST_8);
+ omap_set_dma_dest_data_pack(c->dma_channel, 1);
+ omap_set_dma_dest_burst_mode(c->dma_channel,
+ OMAP_DMA_DATA_BURST_8);
+ } else {
+ dev_info(&pdev->dev,
+ "failed to allocate DMA for OneNAND, "
+ "using PIO instead\n");
+ c->dma_channel = -1;
+ }
+ }
+
+ dev_info(&pdev->dev, "initializing on CS%d, phys base 0x%08lx, virtual "
+ "base %p\n", c->gpmc_cs, c->phys_base,
+ c->onenand.base);
+
+ c->pdev = pdev;
+ c->mtd.name = pdev->dev.bus_id;
+ c->mtd.priv = &c->onenand;
+ c->mtd.owner = THIS_MODULE;
+
+ if (c->dma_channel >= 0) {
+ struct onenand_chip *this = &c->onenand;
+
+ this->wait = omap2_onenand_wait;
+ if (cpu_is_omap34xx()) {
+ this->read_bufferram = omap3_onenand_read_bufferram;
+ this->write_bufferram = omap3_onenand_write_bufferram;
+ } else {
+ this->read_bufferram = omap2_onenand_read_bufferram;
+ this->write_bufferram = omap2_onenand_write_bufferram;
+ }
+ }
+
+ if ((r = onenand_scan(&c->mtd, 1)) < 0)
+ goto err_release_dma;
+
+ switch ((c->onenand.version_id >> 4) & 0xf) {
+ case 0:
+ c->freq = 40;
+ break;
+ case 1:
+ c->freq = 54;
+ break;
+ case 2:
+ c->freq = 66;
+ break;
+ case 3:
+ c->freq = 83;
+ break;
+ }
+
+#ifdef CONFIG_MTD_PARTITIONS
+ if (pdata->parts != NULL)
+ r = add_mtd_partitions(&c->mtd, pdata->parts,
+ pdata->nr_parts);
+ else
+#endif
+ r = add_mtd_device(&c->mtd);
+ if (r < 0)
+ goto err_release_onenand;
+
+ platform_set_drvdata(pdev, c);
+
+ return 0;
+
+err_release_onenand:
+ onenand_release(&c->mtd);
+err_release_dma:
+ if (c->dma_channel != -1)
+ omap_free_dma(c->dma_channel);
+ if (c->gpio_irq)
+ free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c);
+err_release_gpio:
+ if (c->gpio_irq)
+ omap_free_gpio(c->gpio_irq);
+err_iounmap:
+ iounmap(c->onenand.base);
+err_release_mem_region:
+ release_mem_region(c->phys_base, ONENAND_IO_SIZE);
+err_free_cs:
+ gpmc_cs_free(c->gpmc_cs);
+err_kfree:
+ kfree(c);
+
+ return r;
+}
+
+static int __devexit omap2_onenand_remove(struct platform_device *pdev)
+{
+ struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
+
+ BUG_ON(c == NULL);
+
+#ifdef CONFIG_MTD_PARTITIONS
+ if (c->parts)
+ del_mtd_partitions(&c->mtd);
+ else
+ del_mtd_device(&c->mtd);
+#else
+ del_mtd_device(&c->mtd);
+#endif
+
+ onenand_release(&c->mtd);
+ if (c->dma_channel != -1)
+ omap_free_dma(c->dma_channel);
+ omap2_onenand_shutdown(pdev);
+ platform_set_drvdata(pdev, NULL);
+ if (c->gpio_irq) {
+ free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c);
+ omap_free_gpio(c->gpio_irq);
+ }
+ iounmap(c->onenand.base);
+ release_mem_region(c->phys_base, ONENAND_IO_SIZE);
+ kfree(c);
+
+ return 0;
+}
+
+static struct platform_driver omap2_onenand_driver = {
+ .probe = omap2_onenand_probe,
+ .remove = omap2_onenand_remove,
+ .shutdown = omap2_onenand_shutdown,
+ .driver = {
+ .name = DRIVER_NAME,
+ .owner = THIS_MODULE,
+ },
+};
+
+static int __init omap2_onenand_init(void)
+{
+ printk(KERN_INFO "OneNAND driver initializing\n");
+ return platform_driver_register(&omap2_onenand_driver);
+}
+
+static void __exit omap2_onenand_exit(void)
+{
+ platform_driver_unregister(&omap2_onenand_driver);
+}
+
+module_init(omap2_onenand_init);
+module_exit(omap2_onenand_exit);
+
+MODULE_ALIAS(DRIVER_NAME);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
+MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3");
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 926cf3a4135..90ed319f26e 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1794,7 +1794,7 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
return -EINVAL;
}
- instr->fail_addr = 0xffffffff;
+ instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
/* Grab the lock and see if the device is available */
onenand_get_device(mtd, FL_ERASING);
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index a5f3d60047d..33a5d6ed6f1 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -321,8 +321,7 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
DEBUG(MTD_DEBUG_LEVEL1,
"SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n",
ssfdc->cis_block, ssfdc->erase_size, ssfdc->map_len,
- (ssfdc->map_len + MAX_PHYS_BLK_PER_ZONE - 1) /
- MAX_PHYS_BLK_PER_ZONE);
+ DIV_ROUND_UP(ssfdc->map_len, MAX_PHYS_BLK_PER_ZONE));
/* Set geometry */
ssfdc->heads = 16;
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 03c759b4eeb..b30a0b83d7f 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -104,12 +104,9 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
struct ubi_volume_desc *desc;
int vol_id = iminor(inode) - 1, mode, ubi_num;
- lock_kernel();
ubi_num = ubi_major2num(imajor(inode));
- if (ubi_num < 0) {
- unlock_kernel();
+ if (ubi_num < 0)
return ubi_num;
- }
if (file->f_mode & FMODE_WRITE)
mode = UBI_READWRITE;
@@ -119,7 +116,6 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
dbg_gen("open volume %d, mode %d", vol_id, mode);
desc = ubi_open_volume(ubi_num, vol_id, mode);
- unlock_kernel();
if (IS_ERR(desc))
return PTR_ERR(desc);
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 967bb4406df..4f2daa5bbec 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -387,7 +387,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
pnum, vol_id, lnum, ec, sqnum, bitflips);
sv = add_volume(si, vol_id, pnum, vid_hdr);
- if (IS_ERR(sv) < 0)
+ if (IS_ERR(sv))
return PTR_ERR(sv);
if (si->max_sqnum < sqnum)
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 217d0e111b2..333c8941552 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -244,8 +244,8 @@ static int vtbl_check(const struct ubi_device *ubi,
}
if (reserved_pebs > ubi->good_peb_count) {
- dbg_err("too large reserved_pebs, good PEBs %d",
- ubi->good_peb_count);
+ dbg_err("too large reserved_pebs %d, good PEBs %d",
+ reserved_pebs, ubi->good_peb_count);
err = 9;
goto bad;
}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index fc5f2dbf532..8b51e10b778 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -563,7 +563,7 @@ static int __iommu_flush_context(struct intel_iommu *iommu,
spin_unlock_irqrestore(&iommu->register_lock, flag);
- /* flush context entry will implictly flush write buffer */
+ /* flush context entry will implicitly flush write buffer */
return 0;
}
@@ -656,7 +656,7 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
- /* flush context entry will implictly flush write buffer */
+ /* flush iotlb entry will implicitly flush write buffer */
return 0;
}
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c9884bba22d..dbe9f39f443 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1358,11 +1358,10 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
return 0;
err_out:
- dev_warn(&pdev->dev, "BAR %d: can't reserve %s region [%#llx-%#llx]\n",
+ dev_warn(&pdev->dev, "BAR %d: can't reserve %s region %pR\n",
bar,
pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem",
- (unsigned long long)pci_resource_start(pdev, bar),
- (unsigned long long)pci_resource_end(pdev, bar));
+ &pdev->resource[bar]);
return -EBUSY;
}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dd9161a054e..d3db8b24972 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -304,9 +304,8 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
} else {
res->start = l64;
res->end = l64 + sz64;
- printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: [%llx, %llx]\n",
- pci_name(dev), pos, (unsigned long long)res->start,
- (unsigned long long)res->end);
+ printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: %pR\n",
+ pci_name(dev), pos, res);
}
} else {
sz = pci_size(l, sz, mask);
@@ -316,9 +315,10 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
res->start = l;
res->end = l + sz;
- printk(KERN_DEBUG "PCI: %s reg %x %s: [%llx, %llx]\n", pci_name(dev),
- pos, (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
- (unsigned long long)res->start, (unsigned long long)res->end);
+ printk(KERN_DEBUG "PCI: %s reg %x %s: %pR\n",
+ pci_name(dev), pos,
+ (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
+ res);
}
out:
@@ -389,9 +389,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
res->start = base;
if (!res->end)
res->end = limit + 0xfff;
- printk(KERN_DEBUG "PCI: bridge %s io port: [%llx, %llx]\n",
- pci_name(dev), (unsigned long long) res->start,
- (unsigned long long) res->end);
+ printk(KERN_DEBUG "PCI: bridge %s io port: %pR\n",
+ pci_name(dev), res);
}
res = child->resource[1];
@@ -403,9 +402,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
res->start = base;
res->end = limit + 0xfffff;
- printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: [%llx, %llx]\n",
- pci_name(dev), (unsigned long long) res->start,
- (unsigned long long) res->end);
+ printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: %pR\n",
+ pci_name(dev), res);
}
res = child->resource[2];
@@ -441,9 +439,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
res->start = base;
res->end = limit + 0xfffff;
- printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: [%llx, %llx]\n",
- pci_name(dev), (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32",
- (unsigned long long) res->start, (unsigned long long) res->end);
+ printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: %pR\n",
+ pci_name(dev),
+ (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64":"32", res);
}
}
diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index bd5c0e03139..1f5f6143f35 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -21,7 +21,7 @@
* between the ROM and other resources, so enabling it may disable access
* to MMIO registers or other card memory.
*/
-static int pci_enable_rom(struct pci_dev *pdev)
+int pci_enable_rom(struct pci_dev *pdev)
{
struct resource *res = pdev->resource + PCI_ROM_RESOURCE;
struct pci_bus_region region;
@@ -45,7 +45,7 @@ static int pci_enable_rom(struct pci_dev *pdev)
* Disable ROM decoding on a PCI device by turning off the last bit in the
* ROM BAR.
*/
-static void pci_disable_rom(struct pci_dev *pdev)
+void pci_disable_rom(struct pci_dev *pdev)
{
u32 rom_addr;
pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr);
@@ -260,3 +260,5 @@ void pci_cleanup_rom(struct pci_dev *pdev)
EXPORT_SYMBOL(pci_map_rom);
EXPORT_SYMBOL(pci_unmap_rom);
+EXPORT_SYMBOL_GPL(pci_enable_rom);
+EXPORT_SYMBOL_GPL(pci_disable_rom);
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index d5e2106760f..471a429d7a2 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -356,10 +356,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
order = __ffs(align) - 20;
if (order > 11) {
dev_warn(&dev->dev, "BAR %d bad alignment %llx: "
- "%#016llx-%#016llx\n", i,
- (unsigned long long)align,
- (unsigned long long)r->start,
- (unsigned long long)r->end);
+ "%pR\n", i, (unsigned long long)align, r);
r->flags = 0;
continue;
}
@@ -539,11 +536,9 @@ static void pci_bus_dump_res(struct pci_bus *bus)
if (!res)
continue;
- printk(KERN_INFO "bus: %02x index %x %s: [%llx, %llx]\n",
- bus->number, i,
- (res->flags & IORESOURCE_IO) ? "io port" : "mmio",
- (unsigned long long) res->start,
- (unsigned long long) res->end);
+ printk(KERN_INFO "bus: %02x index %x %s: %pR\n",
+ bus->number, i,
+ (res->flags & IORESOURCE_IO) ? "io port" : "mmio", res);
}
}
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 1a5fc83c71b..d4b5c690eaa 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -49,10 +49,8 @@ void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
pcibios_resource_to_bus(dev, &region, res);
- dev_dbg(&dev->dev, "BAR %d: got res [%#llx-%#llx] bus [%#llx-%#llx] "
- "flags %#lx\n", resno,
- (unsigned long long)res->start,
- (unsigned long long)res->end,
+ dev_dbg(&dev->dev, "BAR %d: got res %pR bus [%#llx-%#llx] "
+ "flags %#lx\n", resno, res,
(unsigned long long)region.start,
(unsigned long long)region.end,
(unsigned long)res->flags);
@@ -114,13 +112,11 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
err = insert_resource(root, res);
if (err) {
- dev_err(&dev->dev, "BAR %d: %s of %s [%#llx-%#llx]\n",
+ dev_err(&dev->dev, "BAR %d: %s of %s %pR\n",
resource,
root ? "address space collision on" :
"no parent found for",
- dtype,
- (unsigned long long)res->start,
- (unsigned long long)res->end);
+ dtype, res);
}
return err;
@@ -139,9 +135,8 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
align = resource_alignment(res);
if (!align) {
dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus "
- "alignment) [%#llx-%#llx] flags %#lx\n",
- resno, (unsigned long long)res->start,
- (unsigned long long)res->end, res->flags);
+ "alignment) %pR flags %#lx\n",
+ resno, res, res->flags);
return -EINVAL;
}
@@ -162,11 +157,8 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
}
if (ret) {
- dev_err(&dev->dev, "BAR %d: can't allocate %s resource "
- "[%#llx-%#llx]\n", resno,
- res->flags & IORESOURCE_IO ? "I/O" : "mem",
- (unsigned long long)res->start,
- (unsigned long long)res->end);
+ dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
+ resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
} else {
res->flags &= ~IORESOURCE_STARTALIGN;
if (resno < PCI_BRIDGE_RESOURCES)
@@ -202,11 +194,8 @@ int pci_assign_resource_fixed(struct pci_dev *dev, int resno)
}
if (ret) {
- dev_err(&dev->dev, "BAR %d: can't allocate %s resource "
- "[%#llx-%#llx\n]", resno,
- res->flags & IORESOURCE_IO ? "I/O" : "mem",
- (unsigned long long)res->start,
- (unsigned long long)res->end);
+ dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
+ resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
} else if (resno < PCI_BRIDGE_RESOURCES) {
pci_update_resource(dev, res, resno);
}
@@ -237,9 +226,8 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
r_align = resource_alignment(r);
if (!r_align) {
dev_warn(&dev->dev, "BAR %d: bogus alignment "
- "[%#llx-%#llx] flags %#lx\n",
- i, (unsigned long long)r->start,
- (unsigned long long)r->end, r->flags);
+ "%pR flags %#lx\n",
+ i, r, r->flags);
continue;
}
for (list = head; ; list = list->next) {
@@ -287,9 +275,7 @@ int pci_enable_resources(struct pci_dev *dev, int mask)
if (!r->parent) {
dev_err(&dev->dev, "device not available because of "
- "BAR %d [%#llx-%#llx] collisions\n", i,
- (unsigned long long) r->start,
- (unsigned long long) r->end);
+ "BAR %d %pR collisions\n", i, r);
return -EINVAL;
}
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index fe2aeb11939..23ae8460f5c 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -30,7 +30,7 @@
#define POWER_SUPPLY_ATTR(_name) \
{ \
- .attr = { .name = #_name, .mode = 0444, .owner = THIS_MODULE }, \
+ .attr = { .name = #_name, .mode = 0444 }, \
.show = power_supply_show_property, \
.store = NULL, \
}
diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c
index 6f2f90ebb02..06848b254d5 100644
--- a/drivers/ps3/ps3av.c
+++ b/drivers/ps3/ps3av.c
@@ -915,6 +915,22 @@ int ps3av_video_mute(int mute)
EXPORT_SYMBOL_GPL(ps3av_video_mute);
+/* mute analog output only */
+int ps3av_audio_mute_analog(int mute)
+{
+ int i, res;
+
+ for (i = 0; i < ps3av->av_hw_conf.num_of_avmulti; i++) {
+ res = ps3av_cmd_av_audio_mute(1,
+ &ps3av->av_port[i + ps3av->av_hw_conf.num_of_hdmi],
+ mute);
+ if (res < 0)
+ return -1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ps3av_audio_mute_analog);
+
int ps3av_audio_mute(int mute)
{
return ps3av_set_audio_mute(mute ? PS3AV_CMD_MUTE_ON
diff --git a/drivers/ps3/ps3av_cmd.c b/drivers/ps3/ps3av_cmd.c
index 7f880c26122..11eb50318fe 100644
--- a/drivers/ps3/ps3av_cmd.c
+++ b/drivers/ps3/ps3av_cmd.c
@@ -660,9 +660,10 @@ u32 ps3av_cmd_set_av_audio_param(void *p, u32 port,
}
/* default cs val */
-static const u8 ps3av_mode_cs_info[] = {
+u8 ps3av_mode_cs_info[] = {
0x00, 0x09, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00
};
+EXPORT_SYMBOL_GPL(ps3av_mode_cs_info);
#define CS_44 0x00
#define CS_48 0x02
@@ -677,7 +678,7 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
u32 ch, u32 fs, u32 word_bits, u32 format,
u32 source)
{
- int spdif_through, spdif_bitstream;
+ int spdif_through;
int i;
if (!(ch | fs | format | word_bits | source)) {
@@ -687,7 +688,6 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
format = PS3AV_CMD_AUDIO_FORMAT_PCM;
source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
}
- spdif_through = spdif_bitstream = 0; /* XXX not supported */
/* audio mode */
memset(audio, 0, sizeof(*audio));
@@ -777,16 +777,17 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
break;
}
+ /* non-audio bit */
+ spdif_through = audio->audio_cs_info[0] & 0x02;
+
/* pass through setting */
if (spdif_through &&
(avport == PS3AV_CMD_AVPORT_SPDIF_0 ||
- avport == PS3AV_CMD_AVPORT_SPDIF_1)) {
+ avport == PS3AV_CMD_AVPORT_SPDIF_1 ||
+ avport == PS3AV_CMD_AVPORT_HDMI_0 ||
+ avport == PS3AV_CMD_AVPORT_HDMI_1)) {
audio->audio_word_bits = PS3AV_CMD_AUDIO_WORD_BITS_16;
- audio->audio_source = PS3AV_CMD_AUDIO_SOURCE_SPDIF;
- if (spdif_bitstream) {
- audio->audio_format = PS3AV_CMD_AUDIO_FORMAT_BITSTREAM;
- audio->audio_cs_info[0] |= CS_BIT;
- }
+ audio->audio_format = PS3AV_CMD_AUDIO_FORMAT_BITSTREAM;
}
}
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 37082616482..b5bf9370691 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -53,21 +53,21 @@ static void at91_rtc_decodetime(unsigned int timereg, unsigned int calreg,
} while ((time != at91_sys_read(timereg)) ||
(date != at91_sys_read(calreg)));
- tm->tm_sec = BCD2BIN((time & AT91_RTC_SEC) >> 0);
- tm->tm_min = BCD2BIN((time & AT91_RTC_MIN) >> 8);
- tm->tm_hour = BCD2BIN((time & AT91_RTC_HOUR) >> 16);
+ tm->tm_sec = bcd2bin((time & AT91_RTC_SEC) >> 0);
+ tm->tm_min = bcd2bin((time & AT91_RTC_MIN) >> 8);
+ tm->tm_hour = bcd2bin((time & AT91_RTC_HOUR) >> 16);
/*
* The Calendar Alarm register does not have a field for
* the year - so these will return an invalid value. When an
* alarm is set, at91_alarm_year wille store the current year.
*/
- tm->tm_year = BCD2BIN(date & AT91_RTC_CENT) * 100; /* century */
- tm->tm_year += BCD2BIN((date & AT91_RTC_YEAR) >> 8); /* year */
+ tm->tm_year = bcd2bin(date & AT91_RTC_CENT) * 100; /* century */
+ tm->tm_year += bcd2bin((date & AT91_RTC_YEAR) >> 8); /* year */
- tm->tm_wday = BCD2BIN((date & AT91_RTC_DAY) >> 21) - 1; /* day of the week [0-6], Sunday=0 */
- tm->tm_mon = BCD2BIN((date & AT91_RTC_MONTH) >> 16) - 1;
- tm->tm_mday = BCD2BIN((date & AT91_RTC_DATE) >> 24);
+ tm->tm_wday = bcd2bin((date & AT91_RTC_DAY) >> 21) - 1; /* day of the week [0-6], Sunday=0 */
+ tm->tm_mon = bcd2bin((date & AT91_RTC_MONTH) >> 16) - 1;
+ tm->tm_mday = bcd2bin((date & AT91_RTC_DATE) >> 24);
}
/*
@@ -106,16 +106,16 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm)
at91_sys_write(AT91_RTC_IDR, AT91_RTC_ACKUPD);
at91_sys_write(AT91_RTC_TIMR,
- BIN2BCD(tm->tm_sec) << 0
- | BIN2BCD(tm->tm_min) << 8
- | BIN2BCD(tm->tm_hour) << 16);
+ bin2bcd(tm->tm_sec) << 0
+ | bin2bcd(tm->tm_min) << 8
+ | bin2bcd(tm->tm_hour) << 16);
at91_sys_write(AT91_RTC_CALR,
- BIN2BCD((tm->tm_year + 1900) / 100) /* century */
- | BIN2BCD(tm->tm_year % 100) << 8 /* year */
- | BIN2BCD(tm->tm_mon + 1) << 16 /* tm_mon starts at zero */
- | BIN2BCD(tm->tm_wday + 1) << 21 /* day of the week [0-6], Sunday=0 */
- | BIN2BCD(tm->tm_mday) << 24);
+ bin2bcd((tm->tm_year + 1900) / 100) /* century */
+ | bin2bcd(tm->tm_year % 100) << 8 /* year */
+ | bin2bcd(tm->tm_mon + 1) << 16 /* tm_mon starts at zero */
+ | bin2bcd(tm->tm_wday + 1) << 21 /* day of the week [0-6], Sunday=0 */
+ | bin2bcd(tm->tm_mday) << 24);
/* Restart Time/Calendar */
cr = at91_sys_read(AT91_RTC_CR);
@@ -162,13 +162,13 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
at91_sys_write(AT91_RTC_IDR, AT91_RTC_ALARM);
at91_sys_write(AT91_RTC_TIMALR,
- BIN2BCD(tm.tm_sec) << 0
- | BIN2BCD(tm.tm_min) << 8
- | BIN2BCD(tm.tm_hour) << 16
+ bin2bcd(tm.tm_sec) << 0
+ | bin2bcd(tm.tm_min) << 8
+ | bin2bcd(tm.tm_hour) << 16
| AT91_RTC_HOUREN | AT91_RTC_MINEN | AT91_RTC_SECEN);
at91_sys_write(AT91_RTC_CALALR,
- BIN2BCD(tm.tm_mon + 1) << 16 /* tm_mon starts at zero */
- | BIN2BCD(tm.tm_mday) << 24
+ bin2bcd(tm.tm_mon + 1) << 16 /* tm_mon starts at zero */
+ | bin2bcd(tm.tm_mday) << 24
| AT91_RTC_DATEEN | AT91_RTC_MTHEN);
if (alrm->enabled) {
diff --git a/drivers/rtc/rtc-bq4802.c b/drivers/rtc/rtc-bq4802.c
index 189a018bdf3..d00a274df8f 100644
--- a/drivers/rtc/rtc-bq4802.c
+++ b/drivers/rtc/rtc-bq4802.c
@@ -71,14 +71,14 @@ static int bq4802_read_time(struct device *dev, struct rtc_time *tm)
spin_unlock_irqrestore(&p->lock, flags);
- BCD_TO_BIN(tm->tm_sec);
- BCD_TO_BIN(tm->tm_min);
- BCD_TO_BIN(tm->tm_hour);
- BCD_TO_BIN(tm->tm_mday);
- BCD_TO_BIN(tm->tm_mon);
- BCD_TO_BIN(tm->tm_year);
- BCD_TO_BIN(tm->tm_wday);
- BCD_TO_BIN(century);
+ tm->tm_sec = bcd2bin(tm->tm_sec);
+ tm->tm_min = bcd2bin(tm->tm_min);
+ tm->tm_hour = bcd2bin(tm->tm_hour);
+ tm->tm_mday = bcd2bin(tm->tm_mday);
+ tm->tm_mon = bcd2bin(tm->tm_mon);
+ tm->tm_year = bcd2bin(tm->tm_year);
+ tm->tm_wday = bcd2bin(tm->tm_wday);
+ century = bcd2bin(century);
tm->tm_year += (century * 100);
tm->tm_year -= 1900;
@@ -106,13 +106,13 @@ static int bq4802_set_time(struct device *dev, struct rtc_time *tm)
min = tm->tm_min;
sec = tm->tm_sec;
- BIN_TO_BCD(sec);
- BIN_TO_BCD(min);
- BIN_TO_BCD(hrs);
- BIN_TO_BCD(day);
- BIN_TO_BCD(mon);
- BIN_TO_BCD(yrs);
- BIN_TO_BCD(century);
+ sec = bin2bcd(sec);
+ min = bin2bcd(min);
+ hrs = bin2bcd(hrs);
+ day = bin2bcd(day);
+ mon = bin2bcd(mon);
+ yrs = bin2bcd(yrs);
+ century = bin2bcd(century);
spin_lock_irqsave(&p->lock, flags);
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 963ad0b6a4e..5549231179a 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -143,6 +143,43 @@ static inline int hpet_unregister_irq_handler(irq_handler_t handler)
/*----------------------------------------------------------------*/
+#ifdef RTC_PORT
+
+/* Most newer x86 systems have two register banks, the first used
+ * for RTC and NVRAM and the second only for NVRAM. Caller must
+ * own rtc_lock ... and we won't worry about access during NMI.
+ */
+#define can_bank2 true
+
+static inline unsigned char cmos_read_bank2(unsigned char addr)
+{
+ outb(addr, RTC_PORT(2));
+ return inb(RTC_PORT(3));
+}
+
+static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+{
+ outb(addr, RTC_PORT(2));
+ outb(val, RTC_PORT(2));
+}
+
+#else
+
+#define can_bank2 false
+
+static inline unsigned char cmos_read_bank2(unsigned char addr)
+{
+ return 0;
+}
+
+static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+{
+}
+
+#endif
+
+/*----------------------------------------------------------------*/
+
static int cmos_read_time(struct device *dev, struct rtc_time *t)
{
/* REVISIT: if the clock has a "century" register, use
@@ -203,26 +240,26 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
/* REVISIT this assumes PC style usage: always BCD */
if (((unsigned)t->time.tm_sec) < 0x60)
- t->time.tm_sec = BCD2BIN(t->time.tm_sec);
+ t->time.tm_sec = bcd2bin(t->time.tm_sec);
else
t->time.tm_sec = -1;
if (((unsigned)t->time.tm_min) < 0x60)
- t->time.tm_min = BCD2BIN(t->time.tm_min);
+ t->time.tm_min = bcd2bin(t->time.tm_min);
else
t->time.tm_min = -1;
if (((unsigned)t->time.tm_hour) < 0x24)
- t->time.tm_hour = BCD2BIN(t->time.tm_hour);
+ t->time.tm_hour = bcd2bin(t->time.tm_hour);
else
t->time.tm_hour = -1;
if (cmos->day_alrm) {
if (((unsigned)t->time.tm_mday) <= 0x31)
- t->time.tm_mday = BCD2BIN(t->time.tm_mday);
+ t->time.tm_mday = bcd2bin(t->time.tm_mday);
else
t->time.tm_mday = -1;
if (cmos->mon_alrm) {
if (((unsigned)t->time.tm_mon) <= 0x12)
- t->time.tm_mon = BCD2BIN(t->time.tm_mon) - 1;
+ t->time.tm_mon = bcd2bin(t->time.tm_mon) - 1;
else
t->time.tm_mon = -1;
}
@@ -294,19 +331,19 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
/* Writing 0xff means "don't care" or "match all". */
mon = t->time.tm_mon + 1;
- mon = (mon <= 12) ? BIN2BCD(mon) : 0xff;
+ mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
mday = t->time.tm_mday;
- mday = (mday >= 1 && mday <= 31) ? BIN2BCD(mday) : 0xff;
+ mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
hrs = t->time.tm_hour;
- hrs = (hrs < 24) ? BIN2BCD(hrs) : 0xff;
+ hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
min = t->time.tm_min;
- min = (min < 60) ? BIN2BCD(min) : 0xff;
+ min = (min < 60) ? bin2bcd(min) : 0xff;
sec = t->time.tm_sec;
- sec = (sec < 60) ? BIN2BCD(sec) : 0xff;
+ sec = (sec < 60) ? bin2bcd(sec) : 0xff;
spin_lock_irq(&rtc_lock);
@@ -491,12 +528,21 @@ cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
if (unlikely(off >= attr->size))
return 0;
+ if (unlikely(off < 0))
+ return -EINVAL;
if ((off + count) > attr->size)
count = attr->size - off;
+ off += NVRAM_OFFSET;
spin_lock_irq(&rtc_lock);
- for (retval = 0, off += NVRAM_OFFSET; count--; retval++, off++)
- *buf++ = CMOS_READ(off);
+ for (retval = 0; count; count--, off++, retval++) {
+ if (off < 128)
+ *buf++ = CMOS_READ(off);
+ else if (can_bank2)
+ *buf++ = cmos_read_bank2(off);
+ else
+ break;
+ }
spin_unlock_irq(&rtc_lock);
return retval;
@@ -512,6 +558,8 @@ cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
cmos = dev_get_drvdata(container_of(kobj, struct device, kobj));
if (unlikely(off >= attr->size))
return -EFBIG;
+ if (unlikely(off < 0))
+ return -EINVAL;
if ((off + count) > attr->size)
count = attr->size - off;
@@ -520,15 +568,20 @@ cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
* here. If userspace is smart enough to know what fields of
* NVRAM to update, updating checksums is also part of its job.
*/
+ off += NVRAM_OFFSET;
spin_lock_irq(&rtc_lock);
- for (retval = 0, off += NVRAM_OFFSET; count--; retval++, off++) {
+ for (retval = 0; count; count--, off++, retval++) {
/* don't trash RTC registers */
if (off == cmos->day_alrm
|| off == cmos->mon_alrm
|| off == cmos->century)
buf++;
- else
+ else if (off < 128)
CMOS_WRITE(*buf++, off);
+ else if (can_bank2)
+ cmos_write_bank2(*buf++, off);
+ else
+ break;
}
spin_unlock_irq(&rtc_lock);
@@ -539,7 +592,6 @@ static struct bin_attribute nvram = {
.attr = {
.name = "nvram",
.mode = S_IRUGO | S_IWUSR,
- .owner = THIS_MODULE,
},
.read = cmos_nvram_read,
@@ -631,8 +683,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
/* Heuristic to deduce NVRAM size ... do what the legacy NVRAM
* driver did, but don't reject unknown configs. Old hardware
- * won't address 128 bytes, and for now we ignore the way newer
- * chips can address 256 bytes (using two more i/o ports).
+ * won't address 128 bytes. Newer chips have multiple banks,
+ * though they may not be listed in one I/O resource.
*/
#if defined(CONFIG_ATARI)
address_space = 64;
@@ -642,6 +694,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
#warning Assuming 128 bytes of RTC+NVRAM address space, not 64 bytes.
address_space = 128;
#endif
+ if (can_bank2 && ports->end > (ports->start + 1))
+ address_space = 256;
/* For ACPI systems extension info comes from the FADT. On others,
* board specific setup provides it as appropriate. Systems where
@@ -740,7 +794,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
goto cleanup2;
}
- pr_info("%s: alarms up to one %s%s%s\n",
+ pr_info("%s: alarms up to one %s%s, %zd bytes nvram, %s irqs\n",
cmos_rtc.rtc->dev.bus_id,
is_valid_irq(rtc_irq)
? (cmos_rtc.mon_alrm
@@ -749,6 +803,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
? "month" : "day"))
: "no",
cmos_rtc.century ? ", y3k" : "",
+ nvram.size,
is_hpet_enabled() ? ", hpet irqs" : "");
return 0;
diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c
index 0b17770b032..9a234a4ec06 100644
--- a/drivers/rtc/rtc-ds1216.c
+++ b/drivers/rtc/rtc-ds1216.c
@@ -86,19 +86,19 @@ static int ds1216_rtc_read_time(struct device *dev, struct rtc_time *tm)
ds1216_switch_ds_to_clock(priv->ioaddr);
ds1216_read(priv->ioaddr, (u8 *)&regs);
- tm->tm_sec = BCD2BIN(regs.sec);
- tm->tm_min = BCD2BIN(regs.min);
+ tm->tm_sec = bcd2bin(regs.sec);
+ tm->tm_min = bcd2bin(regs.min);
if (regs.hour & DS1216_HOUR_1224) {
/* AM/PM mode */
- tm->tm_hour = BCD2BIN(regs.hour & 0x1f);
+ tm->tm_hour = bcd2bin(regs.hour & 0x1f);
if (regs.hour & DS1216_HOUR_AMPM)
tm->tm_hour += 12;
} else
- tm->tm_hour = BCD2BIN(regs.hour & 0x3f);
+ tm->tm_hour = bcd2bin(regs.hour & 0x3f);
tm->tm_wday = (regs.wday & 7) - 1;
- tm->tm_mday = BCD2BIN(regs.mday & 0x3f);
- tm->tm_mon = BCD2BIN(regs.month & 0x1f);
- tm->tm_year = BCD2BIN(regs.year);
+ tm->tm_mday = bcd2bin(regs.mday & 0x3f);
+ tm->tm_mon = bcd2bin(regs.month & 0x1f);
+ tm->tm_year = bcd2bin(regs.year);
if (tm->tm_year < 70)
tm->tm_year += 100;
return 0;
@@ -114,19 +114,19 @@ static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm)
ds1216_read(priv->ioaddr, (u8 *)&regs);
regs.tsec = 0; /* clear 0.1 and 0.01 seconds */
- regs.sec = BIN2BCD(tm->tm_sec);
- regs.min = BIN2BCD(tm->tm_min);
+ regs.sec = bin2bcd(tm->tm_sec);
+ regs.min = bin2bcd(tm->tm_min);
regs.hour &= DS1216_HOUR_1224;
if (regs.hour && tm->tm_hour > 12) {
regs.hour |= DS1216_HOUR_AMPM;
tm->tm_hour -= 12;
}
- regs.hour |= BIN2BCD(tm->tm_hour);
+ regs.hour |= bin2bcd(tm->tm_hour);
regs.wday &= ~7;
regs.wday |= tm->tm_wday;
- regs.mday = BIN2BCD(tm->tm_mday);
- regs.month = BIN2BCD(tm->tm_mon);
- regs.year = BIN2BCD(tm->tm_year % 100);
+ regs.mday = bin2bcd(tm->tm_mday);
+ regs.month = bin2bcd(tm->tm_mon);
+ regs.year = bin2bcd(tm->tm_year % 100);
ds1216_switch_ds_to_clock(priv->ioaddr);
ds1216_write(priv->ioaddr, (u8 *)&regs);
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index 8f4e96bb229..18455662077 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -107,13 +107,13 @@ static int ds1302_rtc_read_time(struct device *dev, struct rtc_time *tm)
spin_lock_irq(&rtc->lock);
- tm->tm_sec = BCD2BIN(ds1302_readbyte(RTC_ADDR_SEC));
- tm->tm_min = BCD2BIN(ds1302_readbyte(RTC_ADDR_MIN));
- tm->tm_hour = BCD2BIN(ds1302_readbyte(RTC_ADDR_HOUR));
- tm->tm_wday = BCD2BIN(ds1302_readbyte(RTC_ADDR_DAY));
- tm->tm_mday = BCD2BIN(ds1302_readbyte(RTC_ADDR_DATE));
- tm->tm_mon = BCD2BIN(ds1302_readbyte(RTC_ADDR_MON)) - 1;
- tm->tm_year = BCD2BIN(ds1302_readbyte(RTC_ADDR_YEAR));
+ tm->tm_sec = bcd2bin(ds1302_readbyte(RTC_ADDR_SEC));
+ tm->tm_min = bcd2bin(ds1302_readbyte(RTC_ADDR_MIN));
+ tm->tm_hour = bcd2bin(ds1302_readbyte(RTC_ADDR_HOUR));
+ tm->tm_wday = bcd2bin(ds1302_readbyte(RTC_ADDR_DAY));
+ tm->tm_mday = bcd2bin(ds1302_readbyte(RTC_ADDR_DATE));
+ tm->tm_mon = bcd2bin(ds1302_readbyte(RTC_ADDR_MON)) - 1;
+ tm->tm_year = bcd2bin(ds1302_readbyte(RTC_ADDR_YEAR));
if (tm->tm_year < 70)
tm->tm_year += 100;
@@ -141,13 +141,13 @@ static int ds1302_rtc_set_time(struct device *dev, struct rtc_time *tm)
/* Stop RTC */
ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) | 0x80);
- ds1302_writebyte(RTC_ADDR_SEC, BIN2BCD(tm->tm_sec));
- ds1302_writebyte(RTC_ADDR_MIN, BIN2BCD(tm->tm_min));
- ds1302_writebyte(RTC_ADDR_HOUR, BIN2BCD(tm->tm_hour));
- ds1302_writebyte(RTC_ADDR_DAY, BIN2BCD(tm->tm_wday));
- ds1302_writebyte(RTC_ADDR_DATE, BIN2BCD(tm->tm_mday));
- ds1302_writebyte(RTC_ADDR_MON, BIN2BCD(tm->tm_mon + 1));
- ds1302_writebyte(RTC_ADDR_YEAR, BIN2BCD(tm->tm_year % 100));
+ ds1302_writebyte(RTC_ADDR_SEC, bin2bcd(tm->tm_sec));
+ ds1302_writebyte(RTC_ADDR_MIN, bin2bcd(tm->tm_min));
+ ds1302_writebyte(RTC_ADDR_HOUR, bin2bcd(tm->tm_hour));
+ ds1302_writebyte(RTC_ADDR_DAY, bin2bcd(tm->tm_wday));
+ ds1302_writebyte(RTC_ADDR_DATE, bin2bcd(tm->tm_mday));
+ ds1302_writebyte(RTC_ADDR_MON, bin2bcd(tm->tm_mon + 1));
+ ds1302_writebyte(RTC_ADDR_YEAR, bin2bcd(tm->tm_year % 100));
/* Start RTC */
ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) & ~0x80);
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index b91d02a3ace..fc372df6534 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -114,10 +114,10 @@ static unsigned bcd2hour(u8 bcd)
hour = 12;
bcd &= ~DS1305_HR_PM;
}
- hour += BCD2BIN(bcd);
+ hour += bcd2bin(bcd);
return hour - 1;
}
- return BCD2BIN(bcd);
+ return bcd2bin(bcd);
}
static u8 hour2bcd(bool hr12, int hour)
@@ -125,11 +125,11 @@ static u8 hour2bcd(bool hr12, int hour)
if (hr12) {
hour++;
if (hour <= 12)
- return DS1305_HR_12 | BIN2BCD(hour);
+ return DS1305_HR_12 | bin2bcd(hour);
hour -= 12;
- return DS1305_HR_12 | DS1305_HR_PM | BIN2BCD(hour);
+ return DS1305_HR_12 | DS1305_HR_PM | bin2bcd(hour);
}
- return BIN2BCD(hour);
+ return bin2bcd(hour);
}
/*----------------------------------------------------------------------*/
@@ -206,13 +206,13 @@ static int ds1305_get_time(struct device *dev, struct rtc_time *time)
buf[4], buf[5], buf[6]);
/* Decode the registers */
- time->tm_sec = BCD2BIN(buf[DS1305_SEC]);
- time->tm_min = BCD2BIN(buf[DS1305_MIN]);
+ time->tm_sec = bcd2bin(buf[DS1305_SEC]);
+ time->tm_min = bcd2bin(buf[DS1305_MIN]);
time->tm_hour = bcd2hour(buf[DS1305_HOUR]);
time->tm_wday = buf[DS1305_WDAY] - 1;
- time->tm_mday = BCD2BIN(buf[DS1305_MDAY]);
- time->tm_mon = BCD2BIN(buf[DS1305_MON]) - 1;
- time->tm_year = BCD2BIN(buf[DS1305_YEAR]) + 100;
+ time->tm_mday = bcd2bin(buf[DS1305_MDAY]);
+ time->tm_mon = bcd2bin(buf[DS1305_MON]) - 1;
+ time->tm_year = bcd2bin(buf[DS1305_YEAR]) + 100;
dev_vdbg(dev, "%s secs=%d, mins=%d, "
"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -239,13 +239,13 @@ static int ds1305_set_time(struct device *dev, struct rtc_time *time)
/* Write registers starting at the first time/date address. */
*bp++ = DS1305_WRITE | DS1305_SEC;
- *bp++ = BIN2BCD(time->tm_sec);
- *bp++ = BIN2BCD(time->tm_min);
+ *bp++ = bin2bcd(time->tm_sec);
+ *bp++ = bin2bcd(time->tm_min);
*bp++ = hour2bcd(ds1305->hr12, time->tm_hour);
*bp++ = (time->tm_wday < 7) ? (time->tm_wday + 1) : 1;
- *bp++ = BIN2BCD(time->tm_mday);
- *bp++ = BIN2BCD(time->tm_mon + 1);
- *bp++ = BIN2BCD(time->tm_year - 100);
+ *bp++ = bin2bcd(time->tm_mday);
+ *bp++ = bin2bcd(time->tm_mon + 1);
+ *bp++ = bin2bcd(time->tm_year - 100);
dev_dbg(dev, "%s: %02x %02x %02x, %02x %02x %02x %02x\n",
"write", buf[1], buf[2], buf[3],
@@ -329,8 +329,8 @@ static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm)
* fill in the rest ... and also handle rollover to tomorrow when
* that's needed.
*/
- alm->time.tm_sec = BCD2BIN(buf[DS1305_SEC]);
- alm->time.tm_min = BCD2BIN(buf[DS1305_MIN]);
+ alm->time.tm_sec = bcd2bin(buf[DS1305_SEC]);
+ alm->time.tm_min = bcd2bin(buf[DS1305_MIN]);
alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]);
alm->time.tm_mday = -1;
alm->time.tm_mon = -1;
@@ -387,8 +387,8 @@ static int ds1305_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
/* write alarm */
buf[0] = DS1305_WRITE | DS1305_ALM0(DS1305_SEC);
- buf[1 + DS1305_SEC] = BIN2BCD(alm->time.tm_sec);
- buf[1 + DS1305_MIN] = BIN2BCD(alm->time.tm_min);
+ buf[1 + DS1305_SEC] = bin2bcd(alm->time.tm_sec);
+ buf[1 + DS1305_MIN] = bin2bcd(alm->time.tm_min);
buf[1 + DS1305_HOUR] = hour2bcd(ds1305->hr12, alm->time.tm_hour);
buf[1 + DS1305_WDAY] = DS1305_ALM_DISABLE;
@@ -606,7 +606,6 @@ ds1305_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
static struct bin_attribute nvram = {
.attr.name = "nvram",
.attr.mode = S_IRUGO | S_IWUSR,
- .attr.owner = THIS_MODULE,
.read = ds1305_nvram_read,
.write = ds1305_nvram_write,
.size = DS1305_NVRAM_LEN,
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 4fcf0734a6e..162330b9d1d 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -222,17 +222,17 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
ds1307->regs[4], ds1307->regs[5],
ds1307->regs[6]);
- t->tm_sec = BCD2BIN(ds1307->regs[DS1307_REG_SECS] & 0x7f);
- t->tm_min = BCD2BIN(ds1307->regs[DS1307_REG_MIN] & 0x7f);
+ t->tm_sec = bcd2bin(ds1307->regs[DS1307_REG_SECS] & 0x7f);
+ t->tm_min = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
tmp = ds1307->regs[DS1307_REG_HOUR] & 0x3f;
- t->tm_hour = BCD2BIN(tmp);
- t->tm_wday = BCD2BIN(ds1307->regs[DS1307_REG_WDAY] & 0x07) - 1;
- t->tm_mday = BCD2BIN(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
+ t->tm_hour = bcd2bin(tmp);
+ t->tm_wday = bcd2bin(ds1307->regs[DS1307_REG_WDAY] & 0x07) - 1;
+ t->tm_mday = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
tmp = ds1307->regs[DS1307_REG_MONTH] & 0x1f;
- t->tm_mon = BCD2BIN(tmp) - 1;
+ t->tm_mon = bcd2bin(tmp) - 1;
/* assume 20YY not 19YY, and ignore DS1337_BIT_CENTURY */
- t->tm_year = BCD2BIN(ds1307->regs[DS1307_REG_YEAR]) + 100;
+ t->tm_year = bcd2bin(ds1307->regs[DS1307_REG_YEAR]) + 100;
dev_dbg(dev, "%s secs=%d, mins=%d, "
"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -258,16 +258,16 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
t->tm_mon, t->tm_year, t->tm_wday);
*buf++ = 0; /* first register addr */
- buf[DS1307_REG_SECS] = BIN2BCD(t->tm_sec);
- buf[DS1307_REG_MIN] = BIN2BCD(t->tm_min);
- buf[DS1307_REG_HOUR] = BIN2BCD(t->tm_hour);
- buf[DS1307_REG_WDAY] = BIN2BCD(t->tm_wday + 1);
- buf[DS1307_REG_MDAY] = BIN2BCD(t->tm_mday);
- buf[DS1307_REG_MONTH] = BIN2BCD(t->tm_mon + 1);
+ buf[DS1307_REG_SECS] = bin2bcd(t->tm_sec);
+ buf[DS1307_REG_MIN] = bin2bcd(t->tm_min);
+ buf[DS1307_REG_HOUR] = bin2bcd(t->tm_hour);
+ buf[DS1307_REG_WDAY] = bin2bcd(t->tm_wday + 1);
+ buf[DS1307_REG_MDAY] = bin2bcd(t->tm_mday);
+ buf[DS1307_REG_MONTH] = bin2bcd(t->tm_mon + 1);
/* assume 20YY not 19YY */
tmp = t->tm_year - 100;
- buf[DS1307_REG_YEAR] = BIN2BCD(tmp);
+ buf[DS1307_REG_YEAR] = bin2bcd(tmp);
switch (ds1307->type) {
case ds_1337:
@@ -551,7 +551,6 @@ static struct bin_attribute nvram = {
.attr = {
.name = "nvram",
.mode = S_IRUGO | S_IWUSR,
- .owner = THIS_MODULE,
},
.read = ds1307_nvram_read,
@@ -709,18 +708,18 @@ read_rtc:
}
tmp = ds1307->regs[DS1307_REG_SECS];
- tmp = BCD2BIN(tmp & 0x7f);
+ tmp = bcd2bin(tmp & 0x7f);
if (tmp > 60)
goto exit_bad;
- tmp = BCD2BIN(ds1307->regs[DS1307_REG_MIN] & 0x7f);
+ tmp = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
if (tmp > 60)
goto exit_bad;
- tmp = BCD2BIN(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
+ tmp = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
if (tmp == 0 || tmp > 31)
goto exit_bad;
- tmp = BCD2BIN(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
+ tmp = bcd2bin(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
if (tmp == 0 || tmp > 12)
goto exit_bad;
@@ -739,14 +738,14 @@ read_rtc:
/* Be sure we're in 24 hour mode. Multi-master systems
* take note...
*/
- tmp = BCD2BIN(tmp & 0x1f);
+ tmp = bcd2bin(tmp & 0x1f);
if (tmp == 12)
tmp = 0;
if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM)
tmp += 12;
i2c_smbus_write_byte_data(client,
DS1307_REG_HOUR,
- BIN2BCD(tmp));
+ bin2bcd(tmp));
}
ds1307->rtc = rtc_device_register(client->name, &client->dev,
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 86981d34fbb..25caada7839 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -153,8 +153,8 @@ ds1511_wdog_set(unsigned long deciseconds)
/*
* set the wdog values in the wdog registers
*/
- rtc_write(BIN2BCD(deciseconds % 100), DS1511_WD_MSEC);
- rtc_write(BIN2BCD(deciseconds / 100), DS1511_WD_SEC);
+ rtc_write(bin2bcd(deciseconds % 100), DS1511_WD_MSEC);
+ rtc_write(bin2bcd(deciseconds / 100), DS1511_WD_SEC);
/*
* set wdog enable and wdog 'steering' bit to issue a reset
*/
@@ -220,13 +220,13 @@ static int ds1511_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm)
/*
* each register is a different number of valid bits
*/
- sec = BIN2BCD(sec) & 0x7f;
- min = BIN2BCD(min) & 0x7f;
- hrs = BIN2BCD(hrs) & 0x3f;
- day = BIN2BCD(day) & 0x3f;
- mon = BIN2BCD(mon) & 0x1f;
- yrs = BIN2BCD(yrs) & 0xff;
- cen = BIN2BCD(cen) & 0xff;
+ sec = bin2bcd(sec) & 0x7f;
+ min = bin2bcd(min) & 0x7f;
+ hrs = bin2bcd(hrs) & 0x3f;
+ day = bin2bcd(day) & 0x3f;
+ mon = bin2bcd(mon) & 0x1f;
+ yrs = bin2bcd(yrs) & 0xff;
+ cen = bin2bcd(cen) & 0xff;
spin_lock_irqsave(&ds1511_lock, flags);
rtc_disable_update();
@@ -264,14 +264,14 @@ static int ds1511_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
rtc_enable_update();
spin_unlock_irqrestore(&ds1511_lock, flags);
- rtc_tm->tm_sec = BCD2BIN(rtc_tm->tm_sec);
- rtc_tm->tm_min = BCD2BIN(rtc_tm->tm_min);
- rtc_tm->tm_hour = BCD2BIN(rtc_tm->tm_hour);
- rtc_tm->tm_mday = BCD2BIN(rtc_tm->tm_mday);
- rtc_tm->tm_wday = BCD2BIN(rtc_tm->tm_wday);
- rtc_tm->tm_mon = BCD2BIN(rtc_tm->tm_mon);
- rtc_tm->tm_year = BCD2BIN(rtc_tm->tm_year);
- century = BCD2BIN(century) * 100;
+ rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+ rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+ rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+ rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+ rtc_tm->tm_wday = bcd2bin(rtc_tm->tm_wday);
+ rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+ rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
+ century = bcd2bin(century) * 100;
/*
* Account for differences between how the RTC uses the values
@@ -304,16 +304,16 @@ ds1511_rtc_update_alarm(struct rtc_plat_data *pdata)
spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
rtc_write(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_mday) & 0x3f,
+ 0x80 : bin2bcd(pdata->alrm_mday) & 0x3f,
RTC_ALARM_DATE);
rtc_write(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_hour) & 0x3f,
+ 0x80 : bin2bcd(pdata->alrm_hour) & 0x3f,
RTC_ALARM_HOUR);
rtc_write(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_min) & 0x7f,
+ 0x80 : bin2bcd(pdata->alrm_min) & 0x7f,
RTC_ALARM_MIN);
rtc_write(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_sec) & 0x7f,
+ 0x80 : bin2bcd(pdata->alrm_sec) & 0x7f,
RTC_ALARM_SEC);
rtc_write(rtc_read(RTC_CMD) | (pdata->irqen ? RTC_TIE : 0), RTC_CMD);
rtc_read(RTC_CMD1); /* clear interrupts */
@@ -481,7 +481,6 @@ static struct bin_attribute ds1511_nvram_attr = {
.attr = {
.name = "nvram",
.mode = S_IRUGO | S_IWUGO,
- .owner = THIS_MODULE,
},
.size = DS1511_RAM_MAX,
.read = ds1511_nvram_read,
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 4ef59285b48..b9475cd2021 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -78,17 +78,17 @@ static int ds1553_rtc_set_time(struct device *dev, struct rtc_time *tm)
void __iomem *ioaddr = pdata->ioaddr;
u8 century;
- century = BIN2BCD((tm->tm_year + 1900) / 100);
+ century = bin2bcd((tm->tm_year + 1900) / 100);
writeb(RTC_WRITE, pdata->ioaddr + RTC_CONTROL);
- writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
- writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
- writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
- writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
- writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
- writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
- writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+ writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+ writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+ writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+ writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+ writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+ writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+ writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
/* RTC_CENTURY and RTC_CONTROL share same register */
writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY);
@@ -118,14 +118,14 @@ static int ds1553_rtc_read_time(struct device *dev, struct rtc_time *tm)
year = readb(ioaddr + RTC_YEAR);
century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK;
writeb(0, ioaddr + RTC_CONTROL);
- tm->tm_sec = BCD2BIN(second);
- tm->tm_min = BCD2BIN(minute);
- tm->tm_hour = BCD2BIN(hour);
- tm->tm_mday = BCD2BIN(day);
- tm->tm_wday = BCD2BIN(week);
- tm->tm_mon = BCD2BIN(month) - 1;
+ tm->tm_sec = bcd2bin(second);
+ tm->tm_min = bcd2bin(minute);
+ tm->tm_hour = bcd2bin(hour);
+ tm->tm_mday = bcd2bin(day);
+ tm->tm_wday = bcd2bin(week);
+ tm->tm_mon = bcd2bin(month) - 1;
/* year is 1900 + tm->tm_year */
- tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+ tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
if (rtc_valid_tm(tm) < 0) {
dev_err(dev, "retrieved date/time is not valid.\n");
@@ -141,16 +141,16 @@ static void ds1553_rtc_update_alarm(struct rtc_plat_data *pdata)
spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_mday),
+ 0x80 : bin2bcd(pdata->alrm_mday),
ioaddr + RTC_DATE_ALARM);
writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_hour),
+ 0x80 : bin2bcd(pdata->alrm_hour),
ioaddr + RTC_HOURS_ALARM);
writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_min),
+ 0x80 : bin2bcd(pdata->alrm_min),
ioaddr + RTC_MINUTES_ALARM);
writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_sec),
+ 0x80 : bin2bcd(pdata->alrm_sec),
ioaddr + RTC_SECONDS_ALARM);
writeb(pdata->irqen ? RTC_INTS_AE : 0, ioaddr + RTC_INTERRUPTS);
readb(ioaddr + RTC_FLAGS); /* clear interrupts */
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 24d35ede2db..8bc8501bffc 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -66,17 +66,17 @@ static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm)
void __iomem *ioaddr = pdata->ioaddr_rtc;
u8 century;
- century = BIN2BCD((tm->tm_year + 1900) / 100);
+ century = bin2bcd((tm->tm_year + 1900) / 100);
writeb(RTC_WRITE, ioaddr + RTC_CONTROL);
- writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
- writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
- writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
- writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
- writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
- writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
- writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+ writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+ writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+ writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+ writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+ writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+ writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+ writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
/* RTC_CENTURY and RTC_CONTROL share same register */
writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY);
@@ -106,14 +106,14 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm)
year = readb(ioaddr + RTC_YEAR);
century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK;
writeb(0, ioaddr + RTC_CONTROL);
- tm->tm_sec = BCD2BIN(second);
- tm->tm_min = BCD2BIN(minute);
- tm->tm_hour = BCD2BIN(hour);
- tm->tm_mday = BCD2BIN(day);
- tm->tm_wday = BCD2BIN(week);
- tm->tm_mon = BCD2BIN(month) - 1;
+ tm->tm_sec = bcd2bin(second);
+ tm->tm_min = bcd2bin(minute);
+ tm->tm_hour = bcd2bin(hour);
+ tm->tm_mday = bcd2bin(day);
+ tm->tm_wday = bcd2bin(week);
+ tm->tm_mon = bcd2bin(month) - 1;
/* year is 1900 + tm->tm_year */
- tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+ tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
if (rtc_valid_tm(tm) < 0) {
dev_err(dev, "retrieved date/time is not valid.\n");
diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c
index abfdfcbaa05..3a7be11cc6b 100644
--- a/drivers/rtc/rtc-fm3130.c
+++ b/drivers/rtc/rtc-fm3130.c
@@ -131,17 +131,17 @@ static int fm3130_get_time(struct device *dev, struct rtc_time *t)
fm3130->regs[0xc], fm3130->regs[0xd],
fm3130->regs[0xe]);
- t->tm_sec = BCD2BIN(fm3130->regs[FM3130_RTC_SECONDS] & 0x7f);
- t->tm_min = BCD2BIN(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
+ t->tm_sec = bcd2bin(fm3130->regs[FM3130_RTC_SECONDS] & 0x7f);
+ t->tm_min = bcd2bin(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
tmp = fm3130->regs[FM3130_RTC_HOURS] & 0x3f;
- t->tm_hour = BCD2BIN(tmp);
- t->tm_wday = BCD2BIN(fm3130->regs[FM3130_RTC_DAY] & 0x07) - 1;
- t->tm_mday = BCD2BIN(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
+ t->tm_hour = bcd2bin(tmp);
+ t->tm_wday = bcd2bin(fm3130->regs[FM3130_RTC_DAY] & 0x07) - 1;
+ t->tm_mday = bcd2bin(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
tmp = fm3130->regs[FM3130_RTC_MONTHS] & 0x1f;
- t->tm_mon = BCD2BIN(tmp) - 1;
+ t->tm_mon = bcd2bin(tmp) - 1;
/* assume 20YY not 19YY, and ignore CF bit */
- t->tm_year = BCD2BIN(fm3130->regs[FM3130_RTC_YEARS]) + 100;
+ t->tm_year = bcd2bin(fm3130->regs[FM3130_RTC_YEARS]) + 100;
dev_dbg(dev, "%s secs=%d, mins=%d, "
"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -167,16 +167,16 @@ static int fm3130_set_time(struct device *dev, struct rtc_time *t)
t->tm_mon, t->tm_year, t->tm_wday);
/* first register addr */
- buf[FM3130_RTC_SECONDS] = BIN2BCD(t->tm_sec);
- buf[FM3130_RTC_MINUTES] = BIN2BCD(t->tm_min);
- buf[FM3130_RTC_HOURS] = BIN2BCD(t->tm_hour);
- buf[FM3130_RTC_DAY] = BIN2BCD(t->tm_wday + 1);
- buf[FM3130_RTC_DATE] = BIN2BCD(t->tm_mday);
- buf[FM3130_RTC_MONTHS] = BIN2BCD(t->tm_mon + 1);
+ buf[FM3130_RTC_SECONDS] = bin2bcd(t->tm_sec);
+ buf[FM3130_RTC_MINUTES] = bin2bcd(t->tm_min);
+ buf[FM3130_RTC_HOURS] = bin2bcd(t->tm_hour);
+ buf[FM3130_RTC_DAY] = bin2bcd(t->tm_wday + 1);
+ buf[FM3130_RTC_DATE] = bin2bcd(t->tm_mday);
+ buf[FM3130_RTC_MONTHS] = bin2bcd(t->tm_mon + 1);
/* assume 20YY not 19YY */
tmp = t->tm_year - 100;
- buf[FM3130_RTC_YEARS] = BIN2BCD(tmp);
+ buf[FM3130_RTC_YEARS] = bin2bcd(tmp);
dev_dbg(dev, "%s: %02x %02x %02x %02x %02x %02x %02x"
"%02x %02x %02x %02x %02x %02x %02x %02x\n",
@@ -222,11 +222,11 @@ static int fm3130_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
fm3130->regs[FM3130_ALARM_MONTHS]);
- tm->tm_sec = BCD2BIN(fm3130->regs[FM3130_ALARM_SECONDS] & 0x7F);
- tm->tm_min = BCD2BIN(fm3130->regs[FM3130_ALARM_MINUTES] & 0x7F);
- tm->tm_hour = BCD2BIN(fm3130->regs[FM3130_ALARM_HOURS] & 0x3F);
- tm->tm_mday = BCD2BIN(fm3130->regs[FM3130_ALARM_DATE] & 0x3F);
- tm->tm_mon = BCD2BIN(fm3130->regs[FM3130_ALARM_MONTHS] & 0x1F);
+ tm->tm_sec = bcd2bin(fm3130->regs[FM3130_ALARM_SECONDS] & 0x7F);
+ tm->tm_min = bcd2bin(fm3130->regs[FM3130_ALARM_MINUTES] & 0x7F);
+ tm->tm_hour = bcd2bin(fm3130->regs[FM3130_ALARM_HOURS] & 0x3F);
+ tm->tm_mday = bcd2bin(fm3130->regs[FM3130_ALARM_DATE] & 0x3F);
+ tm->tm_mon = bcd2bin(fm3130->regs[FM3130_ALARM_MONTHS] & 0x1F);
if (tm->tm_mon > 0)
tm->tm_mon -= 1; /* RTC is 1-12, tm_mon is 0-11 */
dev_dbg(dev, "%s secs=%d, mins=%d, "
@@ -252,23 +252,23 @@ static int fm3130_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
if (tm->tm_sec != -1)
fm3130->regs[FM3130_ALARM_SECONDS] =
- BIN2BCD(tm->tm_sec) | 0x80;
+ bin2bcd(tm->tm_sec) | 0x80;
if (tm->tm_min != -1)
fm3130->regs[FM3130_ALARM_MINUTES] =
- BIN2BCD(tm->tm_min) | 0x80;
+ bin2bcd(tm->tm_min) | 0x80;
if (tm->tm_hour != -1)
fm3130->regs[FM3130_ALARM_HOURS] =
- BIN2BCD(tm->tm_hour) | 0x80;
+ bin2bcd(tm->tm_hour) | 0x80;
if (tm->tm_mday != -1)
fm3130->regs[FM3130_ALARM_DATE] =
- BIN2BCD(tm->tm_mday) | 0x80;
+ bin2bcd(tm->tm_mday) | 0x80;
if (tm->tm_mon != -1)
fm3130->regs[FM3130_ALARM_MONTHS] =
- BIN2BCD(tm->tm_mon + 1) | 0x80;
+ bin2bcd(tm->tm_mon + 1) | 0x80;
dev_dbg(dev, "alarm write %02x %02x %02x %02x %02x\n",
fm3130->regs[FM3130_ALARM_SECONDS],
@@ -414,18 +414,18 @@ static int __devinit fm3130_probe(struct i2c_client *client,
/* TODO */
/* TODO need to sanity check alarm */
tmp = fm3130->regs[FM3130_RTC_SECONDS];
- tmp = BCD2BIN(tmp & 0x7f);
+ tmp = bcd2bin(tmp & 0x7f);
if (tmp > 60)
goto exit_bad;
- tmp = BCD2BIN(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
+ tmp = bcd2bin(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
if (tmp > 60)
goto exit_bad;
- tmp = BCD2BIN(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
+ tmp = bcd2bin(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
if (tmp == 0 || tmp > 31)
goto exit_bad;
- tmp = BCD2BIN(fm3130->regs[FM3130_RTC_MONTHS] & 0x1f);
+ tmp = bcd2bin(fm3130->regs[FM3130_RTC_MONTHS] & 0x1f);
if (tmp == 0 || tmp > 12)
goto exit_bad;
diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index a81adab6e51..2cd77ab8fc6 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -259,26 +259,26 @@ isl1208_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
return sr;
}
- tm->tm_sec = BCD2BIN(regs[ISL1208_REG_SC]);
- tm->tm_min = BCD2BIN(regs[ISL1208_REG_MN]);
+ tm->tm_sec = bcd2bin(regs[ISL1208_REG_SC]);
+ tm->tm_min = bcd2bin(regs[ISL1208_REG_MN]);
/* HR field has a more complex interpretation */
{
const u8 _hr = regs[ISL1208_REG_HR];
if (_hr & ISL1208_REG_HR_MIL) /* 24h format */
- tm->tm_hour = BCD2BIN(_hr & 0x3f);
+ tm->tm_hour = bcd2bin(_hr & 0x3f);
else {
/* 12h format */
- tm->tm_hour = BCD2BIN(_hr & 0x1f);
+ tm->tm_hour = bcd2bin(_hr & 0x1f);
if (_hr & ISL1208_REG_HR_PM) /* PM flag set */
tm->tm_hour += 12;
}
}
- tm->tm_mday = BCD2BIN(regs[ISL1208_REG_DT]);
- tm->tm_mon = BCD2BIN(regs[ISL1208_REG_MO]) - 1; /* rtc starts at 1 */
- tm->tm_year = BCD2BIN(regs[ISL1208_REG_YR]) + 100;
- tm->tm_wday = BCD2BIN(regs[ISL1208_REG_DW]);
+ tm->tm_mday = bcd2bin(regs[ISL1208_REG_DT]);
+ tm->tm_mon = bcd2bin(regs[ISL1208_REG_MO]) - 1; /* rtc starts at 1 */
+ tm->tm_year = bcd2bin(regs[ISL1208_REG_YR]) + 100;
+ tm->tm_wday = bcd2bin(regs[ISL1208_REG_DW]);
return 0;
}
@@ -305,13 +305,13 @@ isl1208_i2c_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alarm)
}
/* MSB of each alarm register is an enable bit */
- tm->tm_sec = BCD2BIN(regs[ISL1208_REG_SCA - ISL1208_REG_SCA] & 0x7f);
- tm->tm_min = BCD2BIN(regs[ISL1208_REG_MNA - ISL1208_REG_SCA] & 0x7f);
- tm->tm_hour = BCD2BIN(regs[ISL1208_REG_HRA - ISL1208_REG_SCA] & 0x3f);
- tm->tm_mday = BCD2BIN(regs[ISL1208_REG_DTA - ISL1208_REG_SCA] & 0x3f);
+ tm->tm_sec = bcd2bin(regs[ISL1208_REG_SCA - ISL1208_REG_SCA] & 0x7f);
+ tm->tm_min = bcd2bin(regs[ISL1208_REG_MNA - ISL1208_REG_SCA] & 0x7f);
+ tm->tm_hour = bcd2bin(regs[ISL1208_REG_HRA - ISL1208_REG_SCA] & 0x3f);
+ tm->tm_mday = bcd2bin(regs[ISL1208_REG_DTA - ISL1208_REG_SCA] & 0x3f);
tm->tm_mon =
- BCD2BIN(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1;
- tm->tm_wday = BCD2BIN(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03);
+ bcd2bin(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1;
+ tm->tm_wday = bcd2bin(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03);
return 0;
}
@@ -328,15 +328,15 @@ isl1208_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
int sr;
u8 regs[ISL1208_RTC_SECTION_LEN] = { 0, };
- regs[ISL1208_REG_SC] = BIN2BCD(tm->tm_sec);
- regs[ISL1208_REG_MN] = BIN2BCD(tm->tm_min);
- regs[ISL1208_REG_HR] = BIN2BCD(tm->tm_hour) | ISL1208_REG_HR_MIL;
+ regs[ISL1208_REG_SC] = bin2bcd(tm->tm_sec);
+ regs[ISL1208_REG_MN] = bin2bcd(tm->tm_min);
+ regs[ISL1208_REG_HR] = bin2bcd(tm->tm_hour) | ISL1208_REG_HR_MIL;
- regs[ISL1208_REG_DT] = BIN2BCD(tm->tm_mday);
- regs[ISL1208_REG_MO] = BIN2BCD(tm->tm_mon + 1);
- regs[ISL1208_REG_YR] = BIN2BCD(tm->tm_year - 100);
+ regs[ISL1208_REG_DT] = bin2bcd(tm->tm_mday);
+ regs[ISL1208_REG_MO] = bin2bcd(tm->tm_mon + 1);
+ regs[ISL1208_REG_YR] = bin2bcd(tm->tm_year - 100);
- regs[ISL1208_REG_DW] = BIN2BCD(tm->tm_wday & 7);
+ regs[ISL1208_REG_DW] = bin2bcd(tm->tm_wday & 7);
sr = isl1208_i2c_get_sr(client);
if (sr < 0) {
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 470fb2d2954..893f7dece23 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -110,15 +110,15 @@ static int m41t80_get_datetime(struct i2c_client *client,
return -EIO;
}
- tm->tm_sec = BCD2BIN(buf[M41T80_REG_SEC] & 0x7f);
- tm->tm_min = BCD2BIN(buf[M41T80_REG_MIN] & 0x7f);
- tm->tm_hour = BCD2BIN(buf[M41T80_REG_HOUR] & 0x3f);
- tm->tm_mday = BCD2BIN(buf[M41T80_REG_DAY] & 0x3f);
+ tm->tm_sec = bcd2bin(buf[M41T80_REG_SEC] & 0x7f);
+ tm->tm_min = bcd2bin(buf[M41T80_REG_MIN] & 0x7f);
+ tm->tm_hour = bcd2bin(buf[M41T80_REG_HOUR] & 0x3f);
+ tm->tm_mday = bcd2bin(buf[M41T80_REG_DAY] & 0x3f);
tm->tm_wday = buf[M41T80_REG_WDAY] & 0x07;
- tm->tm_mon = BCD2BIN(buf[M41T80_REG_MON] & 0x1f) - 1;
+ tm->tm_mon = bcd2bin(buf[M41T80_REG_MON] & 0x1f) - 1;
/* assume 20YY not 19YY, and ignore the Century Bit */
- tm->tm_year = BCD2BIN(buf[M41T80_REG_YEAR]) + 100;
+ tm->tm_year = bcd2bin(buf[M41T80_REG_YEAR]) + 100;
return 0;
}
@@ -161,19 +161,19 @@ static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
/* Merge time-data and register flags into buf[0..7] */
buf[M41T80_REG_SSEC] = 0;
buf[M41T80_REG_SEC] =
- BIN2BCD(tm->tm_sec) | (buf[M41T80_REG_SEC] & ~0x7f);
+ bin2bcd(tm->tm_sec) | (buf[M41T80_REG_SEC] & ~0x7f);
buf[M41T80_REG_MIN] =
- BIN2BCD(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f);
+ bin2bcd(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f);
buf[M41T80_REG_HOUR] =
- BIN2BCD(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ;
+ bin2bcd(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ;
buf[M41T80_REG_WDAY] =
(tm->tm_wday & 0x07) | (buf[M41T80_REG_WDAY] & ~0x07);
buf[M41T80_REG_DAY] =
- BIN2BCD(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
+ bin2bcd(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
buf[M41T80_REG_MON] =
- BIN2BCD(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
+ bin2bcd(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
/* assume 20YY not 19YY */
- buf[M41T80_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+ buf[M41T80_REG_YEAR] = bin2bcd(tm->tm_year % 100);
if (i2c_transfer(client->adapter, msgs, 1) != 1) {
dev_err(&client->dev, "write error\n");
@@ -288,15 +288,15 @@ static int m41t80_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *t)
wbuf[0] = M41T80_REG_ALARM_MON; /* offset into rtc's regs */
reg[M41T80_REG_ALARM_SEC] |= t->time.tm_sec >= 0 ?
- BIN2BCD(t->time.tm_sec) : 0x80;
+ bin2bcd(t->time.tm_sec) : 0x80;
reg[M41T80_REG_ALARM_MIN] |= t->time.tm_min >= 0 ?
- BIN2BCD(t->time.tm_min) : 0x80;
+ bin2bcd(t->time.tm_min) : 0x80;
reg[M41T80_REG_ALARM_HOUR] |= t->time.tm_hour >= 0 ?
- BIN2BCD(t->time.tm_hour) : 0x80;
+ bin2bcd(t->time.tm_hour) : 0x80;
reg[M41T80_REG_ALARM_DAY] |= t->time.tm_mday >= 0 ?
- BIN2BCD(t->time.tm_mday) : 0x80;
+ bin2bcd(t->time.tm_mday) : 0x80;
if (t->time.tm_mon >= 0)
- reg[M41T80_REG_ALARM_MON] |= BIN2BCD(t->time.tm_mon + 1);
+ reg[M41T80_REG_ALARM_MON] |= bin2bcd(t->time.tm_mon + 1);
else
reg[M41T80_REG_ALARM_DAY] |= 0x40;
@@ -347,15 +347,15 @@ static int m41t80_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *t)
t->time.tm_mday = -1;
t->time.tm_mon = -1;
if (!(reg[M41T80_REG_ALARM_SEC] & 0x80))
- t->time.tm_sec = BCD2BIN(reg[M41T80_REG_ALARM_SEC] & 0x7f);
+ t->time.tm_sec = bcd2bin(reg[M41T80_REG_ALARM_SEC] & 0x7f);
if (!(reg[M41T80_REG_ALARM_MIN] & 0x80))
- t->time.tm_min = BCD2BIN(reg[M41T80_REG_ALARM_MIN] & 0x7f);
+ t->time.tm_min = bcd2bin(reg[M41T80_REG_ALARM_MIN] & 0x7f);
if (!(reg[M41T80_REG_ALARM_HOUR] & 0x80))
- t->time.tm_hour = BCD2BIN(reg[M41T80_REG_ALARM_HOUR] & 0x3f);
+ t->time.tm_hour = bcd2bin(reg[M41T80_REG_ALARM_HOUR] & 0x3f);
if (!(reg[M41T80_REG_ALARM_DAY] & 0x80))
- t->time.tm_mday = BCD2BIN(reg[M41T80_REG_ALARM_DAY] & 0x3f);
+ t->time.tm_mday = bcd2bin(reg[M41T80_REG_ALARM_DAY] & 0x3f);
if (!(reg[M41T80_REG_ALARM_DAY] & 0x40))
- t->time.tm_mon = BCD2BIN(reg[M41T80_REG_ALARM_MON] & 0x1f) - 1;
+ t->time.tm_mon = bcd2bin(reg[M41T80_REG_ALARM_MON] & 0x1f) - 1;
t->time.tm_year = -1;
t->time.tm_wday = -1;
t->time.tm_yday = -1;
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
index 9b19499c829..c3a18c58daf 100644
--- a/drivers/rtc/rtc-m41t94.c
+++ b/drivers/rtc/rtc-m41t94.c
@@ -41,17 +41,17 @@ static int m41t94_set_time(struct device *dev, struct rtc_time *tm)
tm->tm_mon, tm->tm_year, tm->tm_wday);
buf[0] = 0x80 | M41T94_REG_SECONDS; /* write time + date */
- buf[M41T94_REG_SECONDS] = BIN2BCD(tm->tm_sec);
- buf[M41T94_REG_MINUTES] = BIN2BCD(tm->tm_min);
- buf[M41T94_REG_HOURS] = BIN2BCD(tm->tm_hour);
- buf[M41T94_REG_WDAY] = BIN2BCD(tm->tm_wday + 1);
- buf[M41T94_REG_DAY] = BIN2BCD(tm->tm_mday);
- buf[M41T94_REG_MONTH] = BIN2BCD(tm->tm_mon + 1);
+ buf[M41T94_REG_SECONDS] = bin2bcd(tm->tm_sec);
+ buf[M41T94_REG_MINUTES] = bin2bcd(tm->tm_min);
+ buf[M41T94_REG_HOURS] = bin2bcd(tm->tm_hour);
+ buf[M41T94_REG_WDAY] = bin2bcd(tm->tm_wday + 1);
+ buf[M41T94_REG_DAY] = bin2bcd(tm->tm_mday);
+ buf[M41T94_REG_MONTH] = bin2bcd(tm->tm_mon + 1);
buf[M41T94_REG_HOURS] |= M41T94_BIT_CEB;
if (tm->tm_year >= 100)
buf[M41T94_REG_HOURS] |= M41T94_BIT_CB;
- buf[M41T94_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+ buf[M41T94_REG_YEAR] = bin2bcd(tm->tm_year % 100);
return spi_write(spi, buf, 8);
}
@@ -82,14 +82,14 @@ static int m41t94_read_time(struct device *dev, struct rtc_time *tm)
spi_write(spi, buf, 2);
}
- tm->tm_sec = BCD2BIN(spi_w8r8(spi, M41T94_REG_SECONDS));
- tm->tm_min = BCD2BIN(spi_w8r8(spi, M41T94_REG_MINUTES));
+ tm->tm_sec = bcd2bin(spi_w8r8(spi, M41T94_REG_SECONDS));
+ tm->tm_min = bcd2bin(spi_w8r8(spi, M41T94_REG_MINUTES));
hour = spi_w8r8(spi, M41T94_REG_HOURS);
- tm->tm_hour = BCD2BIN(hour & 0x3f);
- tm->tm_wday = BCD2BIN(spi_w8r8(spi, M41T94_REG_WDAY)) - 1;
- tm->tm_mday = BCD2BIN(spi_w8r8(spi, M41T94_REG_DAY));
- tm->tm_mon = BCD2BIN(spi_w8r8(spi, M41T94_REG_MONTH)) - 1;
- tm->tm_year = BCD2BIN(spi_w8r8(spi, M41T94_REG_YEAR));
+ tm->tm_hour = bcd2bin(hour & 0x3f);
+ tm->tm_wday = bcd2bin(spi_w8r8(spi, M41T94_REG_WDAY)) - 1;
+ tm->tm_mday = bcd2bin(spi_w8r8(spi, M41T94_REG_DAY));
+ tm->tm_mon = bcd2bin(spi_w8r8(spi, M41T94_REG_MONTH)) - 1;
+ tm->tm_year = bcd2bin(spi_w8r8(spi, M41T94_REG_YEAR));
if ((hour & M41T94_BIT_CB) || !(hour & M41T94_BIT_CEB))
tm->tm_year += 100;
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index ce4eff6a8d5..04b63dab693 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -76,10 +76,10 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
/* Issue the READ command */
M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL);
- tm->tm_year = BCD2BIN(M48T59_READ(M48T59_YEAR));
+ tm->tm_year = bcd2bin(M48T59_READ(M48T59_YEAR));
/* tm_mon is 0-11 */
- tm->tm_mon = BCD2BIN(M48T59_READ(M48T59_MONTH)) - 1;
- tm->tm_mday = BCD2BIN(M48T59_READ(M48T59_MDAY));
+ tm->tm_mon = bcd2bin(M48T59_READ(M48T59_MONTH)) - 1;
+ tm->tm_mday = bcd2bin(M48T59_READ(M48T59_MDAY));
val = M48T59_READ(M48T59_WDAY);
if ((pdata->type == M48T59RTC_TYPE_M48T59) &&
@@ -88,10 +88,10 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
tm->tm_year += 100; /* one century */
}
- tm->tm_wday = BCD2BIN(val & 0x07);
- tm->tm_hour = BCD2BIN(M48T59_READ(M48T59_HOUR) & 0x3F);
- tm->tm_min = BCD2BIN(M48T59_READ(M48T59_MIN) & 0x7F);
- tm->tm_sec = BCD2BIN(M48T59_READ(M48T59_SEC) & 0x7F);
+ tm->tm_wday = bcd2bin(val & 0x07);
+ tm->tm_hour = bcd2bin(M48T59_READ(M48T59_HOUR) & 0x3F);
+ tm->tm_min = bcd2bin(M48T59_READ(M48T59_MIN) & 0x7F);
+ tm->tm_sec = bcd2bin(M48T59_READ(M48T59_SEC) & 0x7F);
/* Clear the READ bit */
M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL);
@@ -119,17 +119,17 @@ static int m48t59_rtc_set_time(struct device *dev, struct rtc_time *tm)
/* Issue the WRITE command */
M48T59_SET_BITS(M48T59_CNTL_WRITE, M48T59_CNTL);
- M48T59_WRITE((BIN2BCD(tm->tm_sec) & 0x7F), M48T59_SEC);
- M48T59_WRITE((BIN2BCD(tm->tm_min) & 0x7F), M48T59_MIN);
- M48T59_WRITE((BIN2BCD(tm->tm_hour) & 0x3F), M48T59_HOUR);
- M48T59_WRITE((BIN2BCD(tm->tm_mday) & 0x3F), M48T59_MDAY);
+ M48T59_WRITE((bin2bcd(tm->tm_sec) & 0x7F), M48T59_SEC);
+ M48T59_WRITE((bin2bcd(tm->tm_min) & 0x7F), M48T59_MIN);
+ M48T59_WRITE((bin2bcd(tm->tm_hour) & 0x3F), M48T59_HOUR);
+ M48T59_WRITE((bin2bcd(tm->tm_mday) & 0x3F), M48T59_MDAY);
/* tm_mon is 0-11 */
- M48T59_WRITE((BIN2BCD(tm->tm_mon + 1) & 0x1F), M48T59_MONTH);
- M48T59_WRITE(BIN2BCD(tm->tm_year % 100), M48T59_YEAR);
+ M48T59_WRITE((bin2bcd(tm->tm_mon + 1) & 0x1F), M48T59_MONTH);
+ M48T59_WRITE(bin2bcd(tm->tm_year % 100), M48T59_YEAR);
if (pdata->type == M48T59RTC_TYPE_M48T59 && (tm->tm_year / 100))
val = (M48T59_WDAY_CEB | M48T59_WDAY_CB);
- val |= (BIN2BCD(tm->tm_wday) & 0x07);
+ val |= (bin2bcd(tm->tm_wday) & 0x07);
M48T59_WRITE(val, M48T59_WDAY);
/* Clear the WRITE bit */
@@ -158,18 +158,18 @@ static int m48t59_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm)
/* Issue the READ command */
M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL);
- tm->tm_year = BCD2BIN(M48T59_READ(M48T59_YEAR));
+ tm->tm_year = bcd2bin(M48T59_READ(M48T59_YEAR));
/* tm_mon is 0-11 */
- tm->tm_mon = BCD2BIN(M48T59_READ(M48T59_MONTH)) - 1;
+ tm->tm_mon = bcd2bin(M48T59_READ(M48T59_MONTH)) - 1;
val = M48T59_READ(M48T59_WDAY);
if ((val & M48T59_WDAY_CEB) && (val & M48T59_WDAY_CB))
tm->tm_year += 100; /* one century */
- tm->tm_mday = BCD2BIN(M48T59_READ(M48T59_ALARM_DATE));
- tm->tm_hour = BCD2BIN(M48T59_READ(M48T59_ALARM_HOUR));
- tm->tm_min = BCD2BIN(M48T59_READ(M48T59_ALARM_MIN));
- tm->tm_sec = BCD2BIN(M48T59_READ(M48T59_ALARM_SEC));
+ tm->tm_mday = bcd2bin(M48T59_READ(M48T59_ALARM_DATE));
+ tm->tm_hour = bcd2bin(M48T59_READ(M48T59_ALARM_HOUR));
+ tm->tm_min = bcd2bin(M48T59_READ(M48T59_ALARM_MIN));
+ tm->tm_sec = bcd2bin(M48T59_READ(M48T59_ALARM_SEC));
/* Clear the READ bit */
M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL);
@@ -201,18 +201,18 @@ static int m48t59_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
* 0xff means "always match"
*/
mday = tm->tm_mday;
- mday = (mday >= 1 && mday <= 31) ? BIN2BCD(mday) : 0xff;
+ mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
if (mday == 0xff)
mday = M48T59_READ(M48T59_MDAY);
hour = tm->tm_hour;
- hour = (hour < 24) ? BIN2BCD(hour) : 0x00;
+ hour = (hour < 24) ? bin2bcd(hour) : 0x00;
min = tm->tm_min;
- min = (min < 60) ? BIN2BCD(min) : 0x00;
+ min = (min < 60) ? bin2bcd(min) : 0x00;
sec = tm->tm_sec;
- sec = (sec < 60) ? BIN2BCD(sec) : 0x00;
+ sec = (sec < 60) ? bin2bcd(sec) : 0x00;
spin_lock_irqsave(&m48t59->lock, flags);
/* Issue the WRITE command */
@@ -360,7 +360,6 @@ static struct bin_attribute m48t59_nvram_attr = {
.attr = {
.name = "nvram",
.mode = S_IRUGO | S_IWUSR,
- .owner = THIS_MODULE,
},
.read = m48t59_nvram_read,
.write = m48t59_nvram_write,
diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c
index 3f7f99a5d96..7c045cffa9f 100644
--- a/drivers/rtc/rtc-m48t86.c
+++ b/drivers/rtc/rtc-m48t86.c
@@ -62,14 +62,14 @@ static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
tm->tm_wday = ops->readbyte(M48T86_REG_DOW);
} else {
/* bcd mode */
- tm->tm_sec = BCD2BIN(ops->readbyte(M48T86_REG_SEC));
- tm->tm_min = BCD2BIN(ops->readbyte(M48T86_REG_MIN));
- tm->tm_hour = BCD2BIN(ops->readbyte(M48T86_REG_HOUR) & 0x3F);
- tm->tm_mday = BCD2BIN(ops->readbyte(M48T86_REG_DOM));
+ tm->tm_sec = bcd2bin(ops->readbyte(M48T86_REG_SEC));
+ tm->tm_min = bcd2bin(ops->readbyte(M48T86_REG_MIN));
+ tm->tm_hour = bcd2bin(ops->readbyte(M48T86_REG_HOUR) & 0x3F);
+ tm->tm_mday = bcd2bin(ops->readbyte(M48T86_REG_DOM));
/* tm_mon is 0-11 */
- tm->tm_mon = BCD2BIN(ops->readbyte(M48T86_REG_MONTH)) - 1;
- tm->tm_year = BCD2BIN(ops->readbyte(M48T86_REG_YEAR)) + 100;
- tm->tm_wday = BCD2BIN(ops->readbyte(M48T86_REG_DOW));
+ tm->tm_mon = bcd2bin(ops->readbyte(M48T86_REG_MONTH)) - 1;
+ tm->tm_year = bcd2bin(ops->readbyte(M48T86_REG_YEAR)) + 100;
+ tm->tm_wday = bcd2bin(ops->readbyte(M48T86_REG_DOW));
}
/* correct the hour if the clock is in 12h mode */
@@ -103,13 +103,13 @@ static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
ops->writebyte(tm->tm_wday, M48T86_REG_DOW);
} else {
/* bcd mode */
- ops->writebyte(BIN2BCD(tm->tm_sec), M48T86_REG_SEC);
- ops->writebyte(BIN2BCD(tm->tm_min), M48T86_REG_MIN);
- ops->writebyte(BIN2BCD(tm->tm_hour), M48T86_REG_HOUR);
- ops->writebyte(BIN2BCD(tm->tm_mday), M48T86_REG_DOM);
- ops->writebyte(BIN2BCD(tm->tm_mon + 1), M48T86_REG_MONTH);
- ops->writebyte(BIN2BCD(tm->tm_year % 100), M48T86_REG_YEAR);
- ops->writebyte(BIN2BCD(tm->tm_wday), M48T86_REG_DOW);
+ ops->writebyte(bin2bcd(tm->tm_sec), M48T86_REG_SEC);
+ ops->writebyte(bin2bcd(tm->tm_min), M48T86_REG_MIN);
+ ops->writebyte(bin2bcd(tm->tm_hour), M48T86_REG_HOUR);
+ ops->writebyte(bin2bcd(tm->tm_mday), M48T86_REG_DOM);
+ ops->writebyte(bin2bcd(tm->tm_mon + 1), M48T86_REG_MONTH);
+ ops->writebyte(bin2bcd(tm->tm_year % 100), M48T86_REG_YEAR);
+ ops->writebyte(bin2bcd(tm->tm_wday), M48T86_REG_DOW);
}
/* update ended */
diff --git a/drivers/rtc/rtc-max6900.c b/drivers/rtc/rtc-max6900.c
index 12c9cd25cad..80782798763 100644
--- a/drivers/rtc/rtc-max6900.c
+++ b/drivers/rtc/rtc-max6900.c
@@ -150,14 +150,14 @@ static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
if (rc < 0)
return rc;
- tm->tm_sec = BCD2BIN(regs[MAX6900_REG_SC]);
- tm->tm_min = BCD2BIN(regs[MAX6900_REG_MN]);
- tm->tm_hour = BCD2BIN(regs[MAX6900_REG_HR] & 0x3f);
- tm->tm_mday = BCD2BIN(regs[MAX6900_REG_DT]);
- tm->tm_mon = BCD2BIN(regs[MAX6900_REG_MO]) - 1;
- tm->tm_year = BCD2BIN(regs[MAX6900_REG_YR]) +
- BCD2BIN(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
- tm->tm_wday = BCD2BIN(regs[MAX6900_REG_DW]);
+ tm->tm_sec = bcd2bin(regs[MAX6900_REG_SC]);
+ tm->tm_min = bcd2bin(regs[MAX6900_REG_MN]);
+ tm->tm_hour = bcd2bin(regs[MAX6900_REG_HR] & 0x3f);
+ tm->tm_mday = bcd2bin(regs[MAX6900_REG_DT]);
+ tm->tm_mon = bcd2bin(regs[MAX6900_REG_MO]) - 1;
+ tm->tm_year = bcd2bin(regs[MAX6900_REG_YR]) +
+ bcd2bin(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
+ tm->tm_wday = bcd2bin(regs[MAX6900_REG_DW]);
return 0;
}
@@ -184,14 +184,14 @@ max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
if (rc < 0)
return rc;
- regs[MAX6900_REG_SC] = BIN2BCD(tm->tm_sec);
- regs[MAX6900_REG_MN] = BIN2BCD(tm->tm_min);
- regs[MAX6900_REG_HR] = BIN2BCD(tm->tm_hour);
- regs[MAX6900_REG_DT] = BIN2BCD(tm->tm_mday);
- regs[MAX6900_REG_MO] = BIN2BCD(tm->tm_mon + 1);
- regs[MAX6900_REG_DW] = BIN2BCD(tm->tm_wday);
- regs[MAX6900_REG_YR] = BIN2BCD(tm->tm_year % 100);
- regs[MAX6900_REG_CENTURY] = BIN2BCD((tm->tm_year + 1900) / 100);
+ regs[MAX6900_REG_SC] = bin2bcd(tm->tm_sec);
+ regs[MAX6900_REG_MN] = bin2bcd(tm->tm_min);
+ regs[MAX6900_REG_HR] = bin2bcd(tm->tm_hour);
+ regs[MAX6900_REG_DT] = bin2bcd(tm->tm_mday);
+ regs[MAX6900_REG_MO] = bin2bcd(tm->tm_mon + 1);
+ regs[MAX6900_REG_DW] = bin2bcd(tm->tm_wday);
+ regs[MAX6900_REG_YR] = bin2bcd(tm->tm_year % 100);
+ regs[MAX6900_REG_CENTURY] = bin2bcd((tm->tm_year + 1900) / 100);
/* set write protect */
regs[MAX6900_REG_CT] = MAX6900_REG_CT_WP;
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 78b2551fb19..2f6507df7b4 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -124,15 +124,15 @@ static int max6902_get_datetime(struct device *dev, struct rtc_time *dt)
/* The chip sends data in this order:
* Seconds, Minutes, Hours, Date, Month, Day, Year */
- dt->tm_sec = BCD2BIN(chip->buf[1]);
- dt->tm_min = BCD2BIN(chip->buf[2]);
- dt->tm_hour = BCD2BIN(chip->buf[3]);
- dt->tm_mday = BCD2BIN(chip->buf[4]);
- dt->tm_mon = BCD2BIN(chip->buf[5]) - 1;
- dt->tm_wday = BCD2BIN(chip->buf[6]);
- dt->tm_year = BCD2BIN(chip->buf[7]);
+ dt->tm_sec = bcd2bin(chip->buf[1]);
+ dt->tm_min = bcd2bin(chip->buf[2]);
+ dt->tm_hour = bcd2bin(chip->buf[3]);
+ dt->tm_mday = bcd2bin(chip->buf[4]);
+ dt->tm_mon = bcd2bin(chip->buf[5]) - 1;
+ dt->tm_wday = bcd2bin(chip->buf[6]);
+ dt->tm_year = bcd2bin(chip->buf[7]);
- century = BCD2BIN(tmp) * 100;
+ century = bcd2bin(tmp) * 100;
dt->tm_year += century;
dt->tm_year -= 1900;
@@ -168,15 +168,15 @@ static int max6902_set_datetime(struct device *dev, struct rtc_time *dt)
/* Remove write protection */
max6902_set_reg(dev, 0xF, 0);
- max6902_set_reg(dev, 0x01, BIN2BCD(dt->tm_sec));
- max6902_set_reg(dev, 0x03, BIN2BCD(dt->tm_min));
- max6902_set_reg(dev, 0x05, BIN2BCD(dt->tm_hour));
+ max6902_set_reg(dev, 0x01, bin2bcd(dt->tm_sec));
+ max6902_set_reg(dev, 0x03, bin2bcd(dt->tm_min));
+ max6902_set_reg(dev, 0x05, bin2bcd(dt->tm_hour));
- max6902_set_reg(dev, 0x07, BIN2BCD(dt->tm_mday));
- max6902_set_reg(dev, 0x09, BIN2BCD(dt->tm_mon+1));
- max6902_set_reg(dev, 0x0B, BIN2BCD(dt->tm_wday));
- max6902_set_reg(dev, 0x0D, BIN2BCD(dt->tm_year%100));
- max6902_set_reg(dev, 0x13, BIN2BCD(dt->tm_year/100));
+ max6902_set_reg(dev, 0x07, bin2bcd(dt->tm_mday));
+ max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon+1));
+ max6902_set_reg(dev, 0x0B, bin2bcd(dt->tm_wday));
+ max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year%100));
+ max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year/100));
/* Compulab used a delay here. However, the datasheet
* does not mention a delay being required anywhere... */
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 8876605d4d4..2cbeb0794f1 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -186,30 +186,30 @@ static int tm2bcd(struct rtc_time *tm)
if (rtc_valid_tm(tm) != 0)
return -EINVAL;
- tm->tm_sec = BIN2BCD(tm->tm_sec);
- tm->tm_min = BIN2BCD(tm->tm_min);
- tm->tm_hour = BIN2BCD(tm->tm_hour);
- tm->tm_mday = BIN2BCD(tm->tm_mday);
+ tm->tm_sec = bin2bcd(tm->tm_sec);
+ tm->tm_min = bin2bcd(tm->tm_min);
+ tm->tm_hour = bin2bcd(tm->tm_hour);
+ tm->tm_mday = bin2bcd(tm->tm_mday);
- tm->tm_mon = BIN2BCD(tm->tm_mon + 1);
+ tm->tm_mon = bin2bcd(tm->tm_mon + 1);
/* epoch == 1900 */
if (tm->tm_year < 100 || tm->tm_year > 199)
return -EINVAL;
- tm->tm_year = BIN2BCD(tm->tm_year - 100);
+ tm->tm_year = bin2bcd(tm->tm_year - 100);
return 0;
}
static void bcd2tm(struct rtc_time *tm)
{
- tm->tm_sec = BCD2BIN(tm->tm_sec);
- tm->tm_min = BCD2BIN(tm->tm_min);
- tm->tm_hour = BCD2BIN(tm->tm_hour);
- tm->tm_mday = BCD2BIN(tm->tm_mday);
- tm->tm_mon = BCD2BIN(tm->tm_mon) - 1;
+ tm->tm_sec = bcd2bin(tm->tm_sec);
+ tm->tm_min = bcd2bin(tm->tm_min);
+ tm->tm_hour = bcd2bin(tm->tm_hour);
+ tm->tm_mday = bcd2bin(tm->tm_mday);
+ tm->tm_mon = bcd2bin(tm->tm_mon) - 1;
/* epoch == 1900 */
- tm->tm_year = BCD2BIN(tm->tm_year) + 100;
+ tm->tm_year = bcd2bin(tm->tm_year) + 100;
}
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index a829f20ad6d..b725913ccbe 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -97,13 +97,13 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
buf[8]);
- tm->tm_sec = BCD2BIN(buf[PCF8563_REG_SC] & 0x7F);
- tm->tm_min = BCD2BIN(buf[PCF8563_REG_MN] & 0x7F);
- tm->tm_hour = BCD2BIN(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */
- tm->tm_mday = BCD2BIN(buf[PCF8563_REG_DM] & 0x3F);
+ tm->tm_sec = bcd2bin(buf[PCF8563_REG_SC] & 0x7F);
+ tm->tm_min = bcd2bin(buf[PCF8563_REG_MN] & 0x7F);
+ tm->tm_hour = bcd2bin(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */
+ tm->tm_mday = bcd2bin(buf[PCF8563_REG_DM] & 0x3F);
tm->tm_wday = buf[PCF8563_REG_DW] & 0x07;
- tm->tm_mon = BCD2BIN(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
- tm->tm_year = BCD2BIN(buf[PCF8563_REG_YR]);
+ tm->tm_mon = bcd2bin(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
+ tm->tm_year = bcd2bin(buf[PCF8563_REG_YR]);
if (tm->tm_year < 70)
tm->tm_year += 100; /* assume we are in 1970...2069 */
/* detect the polarity heuristically. see note above. */
@@ -138,17 +138,17 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
/* hours, minutes and seconds */
- buf[PCF8563_REG_SC] = BIN2BCD(tm->tm_sec);
- buf[PCF8563_REG_MN] = BIN2BCD(tm->tm_min);
- buf[PCF8563_REG_HR] = BIN2BCD(tm->tm_hour);
+ buf[PCF8563_REG_SC] = bin2bcd(tm->tm_sec);
+ buf[PCF8563_REG_MN] = bin2bcd(tm->tm_min);
+ buf[PCF8563_REG_HR] = bin2bcd(tm->tm_hour);
- buf[PCF8563_REG_DM] = BIN2BCD(tm->tm_mday);
+ buf[PCF8563_REG_DM] = bin2bcd(tm->tm_mday);
/* month, 1 - 12 */
- buf[PCF8563_REG_MO] = BIN2BCD(tm->tm_mon + 1);
+ buf[PCF8563_REG_MO] = bin2bcd(tm->tm_mon + 1);
/* year and century */
- buf[PCF8563_REG_YR] = BIN2BCD(tm->tm_year % 100);
+ buf[PCF8563_REG_YR] = bin2bcd(tm->tm_year % 100);
if (pcf8563->c_polarity ? (tm->tm_year >= 100) : (tm->tm_year < 100))
buf[PCF8563_REG_MO] |= PCF8563_MO_C;
diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c
index d388c662bf4..7d33cda3f8f 100644
--- a/drivers/rtc/rtc-pcf8583.c
+++ b/drivers/rtc/rtc-pcf8583.c
@@ -76,11 +76,11 @@ static int pcf8583_get_datetime(struct i2c_client *client, struct rtc_time *dt)
buf[4] &= 0x3f;
buf[5] &= 0x1f;
- dt->tm_sec = BCD2BIN(buf[1]);
- dt->tm_min = BCD2BIN(buf[2]);
- dt->tm_hour = BCD2BIN(buf[3]);
- dt->tm_mday = BCD2BIN(buf[4]);
- dt->tm_mon = BCD2BIN(buf[5]) - 1;
+ dt->tm_sec = bcd2bin(buf[1]);
+ dt->tm_min = bcd2bin(buf[2]);
+ dt->tm_hour = bcd2bin(buf[3]);
+ dt->tm_mday = bcd2bin(buf[4]);
+ dt->tm_mon = bcd2bin(buf[5]) - 1;
}
return ret == 2 ? 0 : -EIO;
@@ -94,14 +94,14 @@ static int pcf8583_set_datetime(struct i2c_client *client, struct rtc_time *dt,
buf[0] = 0;
buf[1] = get_ctrl(client) | 0x80;
buf[2] = 0;
- buf[3] = BIN2BCD(dt->tm_sec);
- buf[4] = BIN2BCD(dt->tm_min);
- buf[5] = BIN2BCD(dt->tm_hour);
+ buf[3] = bin2bcd(dt->tm_sec);
+ buf[4] = bin2bcd(dt->tm_min);
+ buf[5] = bin2bcd(dt->tm_hour);
if (datetoo) {
len = 8;
- buf[6] = BIN2BCD(dt->tm_mday) | (dt->tm_year << 6);
- buf[7] = BIN2BCD(dt->tm_mon + 1) | (dt->tm_wday << 5);
+ buf[6] = bin2bcd(dt->tm_mday) | (dt->tm_year << 6);
+ buf[7] = bin2bcd(dt->tm_mon + 1) | (dt->tm_wday << 5);
}
ret = i2c_master_send(client, (char *)buf, len);
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index 395985b339c..42028f233be 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -80,13 +80,13 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt)
memset(dt, 0, sizeof(*dt));
- dt->tm_sec = BCD2BIN(buf[0]); /* RSECCNT */
- dt->tm_min = BCD2BIN(buf[1]); /* RMINCNT */
- dt->tm_hour = BCD2BIN(buf[2]); /* RHRCNT */
+ dt->tm_sec = bcd2bin(buf[0]); /* RSECCNT */
+ dt->tm_min = bcd2bin(buf[1]); /* RMINCNT */
+ dt->tm_hour = bcd2bin(buf[2]); /* RHRCNT */
- dt->tm_mday = BCD2BIN(buf[3]); /* RDAYCNT */
- dt->tm_mon = BCD2BIN(buf[4]) - 1; /* RMONCNT */
- dt->tm_year = BCD2BIN(buf[5]) + 100; /* RYRCNT */
+ dt->tm_mday = bcd2bin(buf[3]); /* RDAYCNT */
+ dt->tm_mon = bcd2bin(buf[4]) - 1; /* RMONCNT */
+ dt->tm_year = bcd2bin(buf[5]) + 100; /* RYRCNT */
/* the rtc device may contain illegal values on power up
* according to the data sheet. make sure they are valid.
@@ -103,12 +103,12 @@ static int r9701_set_datetime(struct device *dev, struct rtc_time *dt)
if (year >= 2100 || year < 2000)
return -EINVAL;
- ret = write_reg(dev, RHRCNT, BIN2BCD(dt->tm_hour));
- ret = ret ? ret : write_reg(dev, RMINCNT, BIN2BCD(dt->tm_min));
- ret = ret ? ret : write_reg(dev, RSECCNT, BIN2BCD(dt->tm_sec));
- ret = ret ? ret : write_reg(dev, RDAYCNT, BIN2BCD(dt->tm_mday));
- ret = ret ? ret : write_reg(dev, RMONCNT, BIN2BCD(dt->tm_mon + 1));
- ret = ret ? ret : write_reg(dev, RYRCNT, BIN2BCD(dt->tm_year - 100));
+ ret = write_reg(dev, RHRCNT, bin2bcd(dt->tm_hour));
+ ret = ret ? ret : write_reg(dev, RMINCNT, bin2bcd(dt->tm_min));
+ ret = ret ? ret : write_reg(dev, RSECCNT, bin2bcd(dt->tm_sec));
+ ret = ret ? ret : write_reg(dev, RDAYCNT, bin2bcd(dt->tm_mday));
+ ret = ret ? ret : write_reg(dev, RMONCNT, bin2bcd(dt->tm_mon + 1));
+ ret = ret ? ret : write_reg(dev, RYRCNT, bin2bcd(dt->tm_year - 100));
ret = ret ? ret : write_reg(dev, RWKCNT, 1 << dt->tm_wday);
return ret;
diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c
index 1c14d4497c4..e6ea3f5ee1e 100644
--- a/drivers/rtc/rtc-rs5c313.c
+++ b/drivers/rtc/rtc-rs5c313.c
@@ -235,33 +235,33 @@ static int rs5c313_rtc_read_time(struct device *dev, struct rtc_time *tm)
data = rs5c313_read_reg(RS5C313_ADDR_SEC);
data |= (rs5c313_read_reg(RS5C313_ADDR_SEC10) << 4);
- tm->tm_sec = BCD2BIN(data);
+ tm->tm_sec = bcd2bin(data);
data = rs5c313_read_reg(RS5C313_ADDR_MIN);
data |= (rs5c313_read_reg(RS5C313_ADDR_MIN10) << 4);
- tm->tm_min = BCD2BIN(data);
+ tm->tm_min = bcd2bin(data);
data = rs5c313_read_reg(RS5C313_ADDR_HOUR);
data |= (rs5c313_read_reg(RS5C313_ADDR_HOUR10) << 4);
- tm->tm_hour = BCD2BIN(data);
+ tm->tm_hour = bcd2bin(data);
data = rs5c313_read_reg(RS5C313_ADDR_DAY);
data |= (rs5c313_read_reg(RS5C313_ADDR_DAY10) << 4);
- tm->tm_mday = BCD2BIN(data);
+ tm->tm_mday = bcd2bin(data);
data = rs5c313_read_reg(RS5C313_ADDR_MON);
data |= (rs5c313_read_reg(RS5C313_ADDR_MON10) << 4);
- tm->tm_mon = BCD2BIN(data) - 1;
+ tm->tm_mon = bcd2bin(data) - 1;
data = rs5c313_read_reg(RS5C313_ADDR_YEAR);
data |= (rs5c313_read_reg(RS5C313_ADDR_YEAR10) << 4);
- tm->tm_year = BCD2BIN(data);
+ tm->tm_year = bcd2bin(data);
if (tm->tm_year < 70)
tm->tm_year += 100;
data = rs5c313_read_reg(RS5C313_ADDR_WEEK);
- tm->tm_wday = BCD2BIN(data);
+ tm->tm_wday = bcd2bin(data);
RS5C313_CEDISABLE;
ndelay(700); /* CE:L */
@@ -294,31 +294,31 @@ static int rs5c313_rtc_set_time(struct device *dev, struct rtc_time *tm)
}
}
- data = BIN2BCD(tm->tm_sec);
+ data = bin2bcd(tm->tm_sec);
rs5c313_write_reg(RS5C313_ADDR_SEC, data);
rs5c313_write_reg(RS5C313_ADDR_SEC10, (data >> 4));
- data = BIN2BCD(tm->tm_min);
+ data = bin2bcd(tm->tm_min);
rs5c313_write_reg(RS5C313_ADDR_MIN, data );
rs5c313_write_reg(RS5C313_ADDR_MIN10, (data >> 4));
- data = BIN2BCD(tm->tm_hour);
+ data = bin2bcd(tm->tm_hour);
rs5c313_write_reg(RS5C313_ADDR_HOUR, data);
rs5c313_write_reg(RS5C313_ADDR_HOUR10, (data >> 4));
- data = BIN2BCD(tm->tm_mday);
+ data = bin2bcd(tm->tm_mday);
rs5c313_write_reg(RS5C313_ADDR_DAY, data);
rs5c313_write_reg(RS5C313_ADDR_DAY10, (data>> 4));
- data = BIN2BCD(tm->tm_mon + 1);
+ data = bin2bcd(tm->tm_mon + 1);
rs5c313_write_reg(RS5C313_ADDR_MON, data);
rs5c313_write_reg(RS5C313_ADDR_MON10, (data >> 4));
- data = BIN2BCD(tm->tm_year % 100);
+ data = bin2bcd(tm->tm_year % 100);
rs5c313_write_reg(RS5C313_ADDR_YEAR, data);
rs5c313_write_reg(RS5C313_ADDR_YEAR10, (data >> 4));
- data = BIN2BCD(tm->tm_wday);
+ data = bin2bcd(tm->tm_wday);
rs5c313_write_reg(RS5C313_ADDR_WEEK, data);
RS5C313_CEDISABLE; /* CE:H */
diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c
index 839462659af..dd1e2bc7a47 100644
--- a/drivers/rtc/rtc-rs5c348.c
+++ b/drivers/rtc/rtc-rs5c348.c
@@ -74,20 +74,20 @@ rs5c348_rtc_set_time(struct device *dev, struct rtc_time *tm)
txbuf[3] = 0; /* dummy */
txbuf[4] = RS5C348_CMD_MW(RS5C348_REG_SECS); /* cmd, sec, ... */
txp = &txbuf[5];
- txp[RS5C348_REG_SECS] = BIN2BCD(tm->tm_sec);
- txp[RS5C348_REG_MINS] = BIN2BCD(tm->tm_min);
+ txp[RS5C348_REG_SECS] = bin2bcd(tm->tm_sec);
+ txp[RS5C348_REG_MINS] = bin2bcd(tm->tm_min);
if (pdata->rtc_24h) {
- txp[RS5C348_REG_HOURS] = BIN2BCD(tm->tm_hour);
+ txp[RS5C348_REG_HOURS] = bin2bcd(tm->tm_hour);
} else {
/* hour 0 is AM12, noon is PM12 */
- txp[RS5C348_REG_HOURS] = BIN2BCD((tm->tm_hour + 11) % 12 + 1) |
+ txp[RS5C348_REG_HOURS] = bin2bcd((tm->tm_hour + 11) % 12 + 1) |
(tm->tm_hour >= 12 ? RS5C348_BIT_PM : 0);
}
- txp[RS5C348_REG_WDAY] = BIN2BCD(tm->tm_wday);
- txp[RS5C348_REG_DAY] = BIN2BCD(tm->tm_mday);
- txp[RS5C348_REG_MONTH] = BIN2BCD(tm->tm_mon + 1) |
+ txp[RS5C348_REG_WDAY] = bin2bcd(tm->tm_wday);
+ txp[RS5C348_REG_DAY] = bin2bcd(tm->tm_mday);
+ txp[RS5C348_REG_MONTH] = bin2bcd(tm->tm_mon + 1) |
(tm->tm_year >= 100 ? RS5C348_BIT_Y2K : 0);
- txp[RS5C348_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+ txp[RS5C348_REG_YEAR] = bin2bcd(tm->tm_year % 100);
/* write in one transfer to avoid data inconsistency */
ret = spi_write_then_read(spi, txbuf, sizeof(txbuf), NULL, 0);
udelay(62); /* Tcsr 62us */
@@ -116,20 +116,20 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)
if (ret < 0)
return ret;
- tm->tm_sec = BCD2BIN(rxbuf[RS5C348_REG_SECS] & RS5C348_SECS_MASK);
- tm->tm_min = BCD2BIN(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
- tm->tm_hour = BCD2BIN(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
+ tm->tm_sec = bcd2bin(rxbuf[RS5C348_REG_SECS] & RS5C348_SECS_MASK);
+ tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
+ tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
if (!pdata->rtc_24h) {
tm->tm_hour %= 12;
if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM)
tm->tm_hour += 12;
}
- tm->tm_wday = BCD2BIN(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
- tm->tm_mday = BCD2BIN(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
+ tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
+ tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
tm->tm_mon =
- BCD2BIN(rxbuf[RS5C348_REG_MONTH] & RS5C348_MONTH_MASK) - 1;
+ bcd2bin(rxbuf[RS5C348_REG_MONTH] & RS5C348_MONTH_MASK) - 1;
/* year is 1900 + tm->tm_year */
- tm->tm_year = BCD2BIN(rxbuf[RS5C348_REG_YEAR]) +
+ tm->tm_year = bcd2bin(rxbuf[RS5C348_REG_YEAR]) +
((rxbuf[RS5C348_REG_MONTH] & RS5C348_BIT_Y2K) ? 100 : 0);
if (rtc_valid_tm(tm) < 0) {
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 8b561958fb1..2f2c68d476d 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -148,9 +148,9 @@ static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
unsigned hour;
if (rs5c->time24)
- return BCD2BIN(reg & 0x3f);
+ return bcd2bin(reg & 0x3f);
- hour = BCD2BIN(reg & 0x1f);
+ hour = bcd2bin(reg & 0x1f);
if (hour == 12)
hour = 0;
if (reg & 0x20)
@@ -161,15 +161,15 @@ static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour)
{
if (rs5c->time24)
- return BIN2BCD(hour);
+ return bin2bcd(hour);
if (hour > 12)
- return 0x20 | BIN2BCD(hour - 12);
+ return 0x20 | bin2bcd(hour - 12);
if (hour == 12)
- return 0x20 | BIN2BCD(12);
+ return 0x20 | bin2bcd(12);
if (hour == 0)
- return BIN2BCD(12);
- return BIN2BCD(hour);
+ return bin2bcd(12);
+ return bin2bcd(hour);
}
static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
@@ -180,18 +180,18 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
if (status < 0)
return status;
- tm->tm_sec = BCD2BIN(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
- tm->tm_min = BCD2BIN(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
+ tm->tm_sec = bcd2bin(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
+ tm->tm_min = bcd2bin(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
tm->tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C372_REG_HOURS]);
- tm->tm_wday = BCD2BIN(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
- tm->tm_mday = BCD2BIN(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
+ tm->tm_wday = bcd2bin(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
+ tm->tm_mday = bcd2bin(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
/* tm->tm_mon is zero-based */
- tm->tm_mon = BCD2BIN(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
+ tm->tm_mon = bcd2bin(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
/* year is 1900 + tm->tm_year */
- tm->tm_year = BCD2BIN(rs5c->regs[RS5C372_REG_YEAR]) + 100;
+ tm->tm_year = bcd2bin(rs5c->regs[RS5C372_REG_YEAR]) + 100;
dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
"mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -216,13 +216,13 @@ static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
addr = RS5C_ADDR(RS5C372_REG_SECS);
- buf[0] = BIN2BCD(tm->tm_sec);
- buf[1] = BIN2BCD(tm->tm_min);
+ buf[0] = bin2bcd(tm->tm_sec);
+ buf[1] = bin2bcd(tm->tm_min);
buf[2] = rs5c_hr2reg(rs5c, tm->tm_hour);
- buf[3] = BIN2BCD(tm->tm_wday);
- buf[4] = BIN2BCD(tm->tm_mday);
- buf[5] = BIN2BCD(tm->tm_mon + 1);
- buf[6] = BIN2BCD(tm->tm_year - 100);
+ buf[3] = bin2bcd(tm->tm_wday);
+ buf[4] = bin2bcd(tm->tm_mday);
+ buf[5] = bin2bcd(tm->tm_mon + 1);
+ buf[6] = bin2bcd(tm->tm_year - 100);
if (i2c_smbus_write_i2c_block_data(client, addr, sizeof(buf), buf) < 0) {
dev_err(&client->dev, "%s: write error\n", __func__);
@@ -367,7 +367,7 @@ static int rs5c_read_alarm(struct device *dev, struct rtc_wkalrm *t)
/* report alarm time */
t->time.tm_sec = 0;
- t->time.tm_min = BCD2BIN(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
+ t->time.tm_min = bcd2bin(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]);
t->time.tm_mday = -1;
t->time.tm_mon = -1;
@@ -413,7 +413,7 @@ static int rs5c_set_alarm(struct device *dev, struct rtc_wkalrm *t)
}
/* set alarm */
- buf[0] = BIN2BCD(t->time.tm_min);
+ buf[0] = bin2bcd(t->time.tm_min);
buf[1] = rs5c_hr2reg(rs5c, t->time.tm_hour);
buf[2] = 0x7f; /* any/all days */
diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index a6fa1f2f2ca..def4d396d0b 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -104,12 +104,12 @@ static int s35390a_disable_test_mode(struct s35390a *s35390a)
static char s35390a_hr2reg(struct s35390a *s35390a, int hour)
{
if (s35390a->twentyfourhour)
- return BIN2BCD(hour);
+ return bin2bcd(hour);
if (hour < 12)
- return BIN2BCD(hour);
+ return bin2bcd(hour);
- return 0x40 | BIN2BCD(hour - 12);
+ return 0x40 | bin2bcd(hour - 12);
}
static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
@@ -117,9 +117,9 @@ static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
unsigned hour;
if (s35390a->twentyfourhour)
- return BCD2BIN(reg & 0x3f);
+ return bcd2bin(reg & 0x3f);
- hour = BCD2BIN(reg & 0x3f);
+ hour = bcd2bin(reg & 0x3f);
if (reg & 0x40)
hour += 12;
@@ -137,13 +137,13 @@ static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year,
tm->tm_wday);
- buf[S35390A_BYTE_YEAR] = BIN2BCD(tm->tm_year - 100);
- buf[S35390A_BYTE_MONTH] = BIN2BCD(tm->tm_mon + 1);
- buf[S35390A_BYTE_DAY] = BIN2BCD(tm->tm_mday);
- buf[S35390A_BYTE_WDAY] = BIN2BCD(tm->tm_wday);
+ buf[S35390A_BYTE_YEAR] = bin2bcd(tm->tm_year - 100);
+ buf[S35390A_BYTE_MONTH] = bin2bcd(tm->tm_mon + 1);
+ buf[S35390A_BYTE_DAY] = bin2bcd(tm->tm_mday);
+ buf[S35390A_BYTE_WDAY] = bin2bcd(tm->tm_wday);
buf[S35390A_BYTE_HOURS] = s35390a_hr2reg(s35390a, tm->tm_hour);
- buf[S35390A_BYTE_MINS] = BIN2BCD(tm->tm_min);
- buf[S35390A_BYTE_SECS] = BIN2BCD(tm->tm_sec);
+ buf[S35390A_BYTE_MINS] = bin2bcd(tm->tm_min);
+ buf[S35390A_BYTE_SECS] = bin2bcd(tm->tm_sec);
/* This chip expects the bits of each byte to be in reverse order */
for (i = 0; i < 7; ++i)
@@ -168,13 +168,13 @@ static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
for (i = 0; i < 7; ++i)
buf[i] = bitrev8(buf[i]);
- tm->tm_sec = BCD2BIN(buf[S35390A_BYTE_SECS]);
- tm->tm_min = BCD2BIN(buf[S35390A_BYTE_MINS]);
+ tm->tm_sec = bcd2bin(buf[S35390A_BYTE_SECS]);
+ tm->tm_min = bcd2bin(buf[S35390A_BYTE_MINS]);
tm->tm_hour = s35390a_reg2hr(s35390a, buf[S35390A_BYTE_HOURS]);
- tm->tm_wday = BCD2BIN(buf[S35390A_BYTE_WDAY]);
- tm->tm_mday = BCD2BIN(buf[S35390A_BYTE_DAY]);
- tm->tm_mon = BCD2BIN(buf[S35390A_BYTE_MONTH]) - 1;
- tm->tm_year = BCD2BIN(buf[S35390A_BYTE_YEAR]) + 100;
+ tm->tm_wday = bcd2bin(buf[S35390A_BYTE_WDAY]);
+ tm->tm_mday = bcd2bin(buf[S35390A_BYTE_DAY]);
+ tm->tm_mon = bcd2bin(buf[S35390A_BYTE_MONTH]) - 1;
+ tm->tm_year = bcd2bin(buf[S35390A_BYTE_YEAR]) + 100;
dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, mday=%d, "
"mon=%d, year=%d, wday=%d\n", __func__, tm->tm_sec,
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index e7d19b6c265..910bc704939 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -134,12 +134,12 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
- BCD_TO_BIN(rtc_tm->tm_sec);
- BCD_TO_BIN(rtc_tm->tm_min);
- BCD_TO_BIN(rtc_tm->tm_hour);
- BCD_TO_BIN(rtc_tm->tm_mday);
- BCD_TO_BIN(rtc_tm->tm_mon);
- BCD_TO_BIN(rtc_tm->tm_year);
+ rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+ rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+ rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+ rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+ rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+ rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
rtc_tm->tm_year += 100;
rtc_tm->tm_mon -= 1;
@@ -163,12 +163,12 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
return -EINVAL;
}
- writeb(BIN2BCD(tm->tm_sec), base + S3C2410_RTCSEC);
- writeb(BIN2BCD(tm->tm_min), base + S3C2410_RTCMIN);
- writeb(BIN2BCD(tm->tm_hour), base + S3C2410_RTCHOUR);
- writeb(BIN2BCD(tm->tm_mday), base + S3C2410_RTCDATE);
- writeb(BIN2BCD(tm->tm_mon + 1), base + S3C2410_RTCMON);
- writeb(BIN2BCD(year), base + S3C2410_RTCYEAR);
+ writeb(bin2bcd(tm->tm_sec), base + S3C2410_RTCSEC);
+ writeb(bin2bcd(tm->tm_min), base + S3C2410_RTCMIN);
+ writeb(bin2bcd(tm->tm_hour), base + S3C2410_RTCHOUR);
+ writeb(bin2bcd(tm->tm_mday), base + S3C2410_RTCDATE);
+ writeb(bin2bcd(tm->tm_mon + 1), base + S3C2410_RTCMON);
+ writeb(bin2bcd(year), base + S3C2410_RTCYEAR);
return 0;
}
@@ -199,34 +199,34 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
/* decode the alarm enable field */
if (alm_en & S3C2410_RTCALM_SECEN)
- BCD_TO_BIN(alm_tm->tm_sec);
+ alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
else
alm_tm->tm_sec = 0xff;
if (alm_en & S3C2410_RTCALM_MINEN)
- BCD_TO_BIN(alm_tm->tm_min);
+ alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
else
alm_tm->tm_min = 0xff;
if (alm_en & S3C2410_RTCALM_HOUREN)
- BCD_TO_BIN(alm_tm->tm_hour);
+ alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
else
alm_tm->tm_hour = 0xff;
if (alm_en & S3C2410_RTCALM_DAYEN)
- BCD_TO_BIN(alm_tm->tm_mday);
+ alm_tm->tm_mday = bcd2bin(alm_tm->tm_mday);
else
alm_tm->tm_mday = 0xff;
if (alm_en & S3C2410_RTCALM_MONEN) {
- BCD_TO_BIN(alm_tm->tm_mon);
+ alm_tm->tm_mon = bcd2bin(alm_tm->tm_mon);
alm_tm->tm_mon -= 1;
} else {
alm_tm->tm_mon = 0xff;
}
if (alm_en & S3C2410_RTCALM_YEAREN)
- BCD_TO_BIN(alm_tm->tm_year);
+ alm_tm->tm_year = bcd2bin(alm_tm->tm_year);
else
alm_tm->tm_year = 0xffff;
@@ -250,17 +250,17 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
if (tm->tm_sec < 60 && tm->tm_sec >= 0) {
alrm_en |= S3C2410_RTCALM_SECEN;
- writeb(BIN2BCD(tm->tm_sec), base + S3C2410_ALMSEC);
+ writeb(bin2bcd(tm->tm_sec), base + S3C2410_ALMSEC);
}
if (tm->tm_min < 60 && tm->tm_min >= 0) {
alrm_en |= S3C2410_RTCALM_MINEN;
- writeb(BIN2BCD(tm->tm_min), base + S3C2410_ALMMIN);
+ writeb(bin2bcd(tm->tm_min), base + S3C2410_ALMMIN);
}
if (tm->tm_hour < 24 && tm->tm_hour >= 0) {
alrm_en |= S3C2410_RTCALM_HOUREN;
- writeb(BIN2BCD(tm->tm_hour), base + S3C2410_ALMHOUR);
+ writeb(bin2bcd(tm->tm_hour), base + S3C2410_ALMHOUR);
}
pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en);
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 3f393c82e32..aaf9d6a337c 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -324,23 +324,23 @@ static int sh_rtc_read_time(struct device *dev, struct rtc_time *tm)
sec128 = readb(rtc->regbase + R64CNT);
- tm->tm_sec = BCD2BIN(readb(rtc->regbase + RSECCNT));
- tm->tm_min = BCD2BIN(readb(rtc->regbase + RMINCNT));
- tm->tm_hour = BCD2BIN(readb(rtc->regbase + RHRCNT));
- tm->tm_wday = BCD2BIN(readb(rtc->regbase + RWKCNT));
- tm->tm_mday = BCD2BIN(readb(rtc->regbase + RDAYCNT));
- tm->tm_mon = BCD2BIN(readb(rtc->regbase + RMONCNT)) - 1;
+ tm->tm_sec = bcd2bin(readb(rtc->regbase + RSECCNT));
+ tm->tm_min = bcd2bin(readb(rtc->regbase + RMINCNT));
+ tm->tm_hour = bcd2bin(readb(rtc->regbase + RHRCNT));
+ tm->tm_wday = bcd2bin(readb(rtc->regbase + RWKCNT));
+ tm->tm_mday = bcd2bin(readb(rtc->regbase + RDAYCNT));
+ tm->tm_mon = bcd2bin(readb(rtc->regbase + RMONCNT)) - 1;
if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
yr = readw(rtc->regbase + RYRCNT);
- yr100 = BCD2BIN(yr >> 8);
+ yr100 = bcd2bin(yr >> 8);
yr &= 0xff;
} else {
yr = readb(rtc->regbase + RYRCNT);
- yr100 = BCD2BIN((yr == 0x99) ? 0x19 : 0x20);
+ yr100 = bcd2bin((yr == 0x99) ? 0x19 : 0x20);
}
- tm->tm_year = (yr100 * 100 + BCD2BIN(yr)) - 1900;
+ tm->tm_year = (yr100 * 100 + bcd2bin(yr)) - 1900;
sec2 = readb(rtc->regbase + R64CNT);
cf_bit = readb(rtc->regbase + RCR1) & RCR1_CF;
@@ -382,20 +382,20 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
tmp &= ~RCR2_START;
writeb(tmp, rtc->regbase + RCR2);
- writeb(BIN2BCD(tm->tm_sec), rtc->regbase + RSECCNT);
- writeb(BIN2BCD(tm->tm_min), rtc->regbase + RMINCNT);
- writeb(BIN2BCD(tm->tm_hour), rtc->regbase + RHRCNT);
- writeb(BIN2BCD(tm->tm_wday), rtc->regbase + RWKCNT);
- writeb(BIN2BCD(tm->tm_mday), rtc->regbase + RDAYCNT);
- writeb(BIN2BCD(tm->tm_mon + 1), rtc->regbase + RMONCNT);
+ writeb(bin2bcd(tm->tm_sec), rtc->regbase + RSECCNT);
+ writeb(bin2bcd(tm->tm_min), rtc->regbase + RMINCNT);
+ writeb(bin2bcd(tm->tm_hour), rtc->regbase + RHRCNT);
+ writeb(bin2bcd(tm->tm_wday), rtc->regbase + RWKCNT);
+ writeb(bin2bcd(tm->tm_mday), rtc->regbase + RDAYCNT);
+ writeb(bin2bcd(tm->tm_mon + 1), rtc->regbase + RMONCNT);
if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
- year = (BIN2BCD((tm->tm_year + 1900) / 100) << 8) |
- BIN2BCD(tm->tm_year % 100);
+ year = (bin2bcd((tm->tm_year + 1900) / 100) << 8) |
+ bin2bcd(tm->tm_year % 100);
writew(year, rtc->regbase + RYRCNT);
} else {
year = tm->tm_year % 100;
- writeb(BIN2BCD(year), rtc->regbase + RYRCNT);
+ writeb(bin2bcd(year), rtc->regbase + RYRCNT);
}
/* Start RTC */
@@ -417,7 +417,7 @@ static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off)
byte = readb(rtc->regbase + reg_off);
if (byte & AR_ENB) {
byte &= ~AR_ENB; /* strip the enable bit */
- value = BCD2BIN(byte);
+ value = bcd2bin(byte);
}
return value;
@@ -455,7 +455,7 @@ static inline void sh_rtc_write_alarm_value(struct sh_rtc *rtc,
if (value < 0)
writeb(0, rtc->regbase + reg_off);
else
- writeb(BIN2BCD(value) | AR_ENB, rtc->regbase + reg_off);
+ writeb(bin2bcd(value) | AR_ENB, rtc->regbase + reg_off);
}
static int sh_rtc_check_alarm(struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index 9a7e920315f..f4cd46e15af 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -82,14 +82,14 @@ static int stk17ta8_rtc_set_time(struct device *dev, struct rtc_time *tm)
flags = readb(pdata->ioaddr + RTC_FLAGS);
writeb(flags | RTC_WRITE, pdata->ioaddr + RTC_FLAGS);
- writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
- writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
- writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
- writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
- writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
- writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
- writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
- writeb(BIN2BCD((tm->tm_year + 1900) / 100), ioaddr + RTC_CENTURY);
+ writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+ writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+ writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+ writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+ writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+ writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+ writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+ writeb(bin2bcd((tm->tm_year + 1900) / 100), ioaddr + RTC_CENTURY);
writeb(flags & ~RTC_WRITE, pdata->ioaddr + RTC_FLAGS);
return 0;
@@ -120,14 +120,14 @@ static int stk17ta8_rtc_read_time(struct device *dev, struct rtc_time *tm)
year = readb(ioaddr + RTC_YEAR);
century = readb(ioaddr + RTC_CENTURY);
writeb(flags & ~RTC_READ, ioaddr + RTC_FLAGS);
- tm->tm_sec = BCD2BIN(second);
- tm->tm_min = BCD2BIN(minute);
- tm->tm_hour = BCD2BIN(hour);
- tm->tm_mday = BCD2BIN(day);
- tm->tm_wday = BCD2BIN(week);
- tm->tm_mon = BCD2BIN(month) - 1;
+ tm->tm_sec = bcd2bin(second);
+ tm->tm_min = bcd2bin(minute);
+ tm->tm_hour = bcd2bin(hour);
+ tm->tm_mday = bcd2bin(day);
+ tm->tm_wday = bcd2bin(week);
+ tm->tm_mon = bcd2bin(month) - 1;
/* year is 1900 + tm->tm_year */
- tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+ tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
if (rtc_valid_tm(tm) < 0) {
dev_err(dev, "retrieved date/time is not valid.\n");
@@ -148,16 +148,16 @@ static void stk17ta8_rtc_update_alarm(struct rtc_plat_data *pdata)
writeb(flags | RTC_WRITE, ioaddr + RTC_FLAGS);
writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_mday),
+ 0x80 : bin2bcd(pdata->alrm_mday),
ioaddr + RTC_DATE_ALARM);
writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_hour),
+ 0x80 : bin2bcd(pdata->alrm_hour),
ioaddr + RTC_HOURS_ALARM);
writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_min),
+ 0x80 : bin2bcd(pdata->alrm_min),
ioaddr + RTC_MINUTES_ALARM);
writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
- 0x80 : BIN2BCD(pdata->alrm_sec),
+ 0x80 : bin2bcd(pdata->alrm_sec),
ioaddr + RTC_SECONDS_ALARM);
writeb(pdata->irqen ? RTC_INTS_AIE : 0, ioaddr + RTC_INTERRUPTS);
readb(ioaddr + RTC_FLAGS); /* clear interrupts */
@@ -280,7 +280,6 @@ static struct bin_attribute stk17ta8_nvram_attr = {
.attr = {
.name = "nvram",
.mode = S_IRUGO | S_IWUSR,
- .owner = THIS_MODULE,
},
.size = RTC_OFFSET,
.read = stk17ta8_nvram_read,
diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c
index 10025d84026..14d4f036a76 100644
--- a/drivers/rtc/rtc-v3020.c
+++ b/drivers/rtc/rtc-v3020.c
@@ -92,19 +92,19 @@ static int v3020_read_time(struct device *dev, struct rtc_time *dt)
/* ...and then read constant values. */
tmp = v3020_get_reg(chip, V3020_SECONDS);
- dt->tm_sec = BCD2BIN(tmp);
+ dt->tm_sec = bcd2bin(tmp);
tmp = v3020_get_reg(chip, V3020_MINUTES);
- dt->tm_min = BCD2BIN(tmp);
+ dt->tm_min = bcd2bin(tmp);
tmp = v3020_get_reg(chip, V3020_HOURS);
- dt->tm_hour = BCD2BIN(tmp);
+ dt->tm_hour = bcd2bin(tmp);
tmp = v3020_get_reg(chip, V3020_MONTH_DAY);
- dt->tm_mday = BCD2BIN(tmp);
+ dt->tm_mday = bcd2bin(tmp);
tmp = v3020_get_reg(chip, V3020_MONTH);
- dt->tm_mon = BCD2BIN(tmp) - 1;
+ dt->tm_mon = bcd2bin(tmp) - 1;
tmp = v3020_get_reg(chip, V3020_WEEK_DAY);
- dt->tm_wday = BCD2BIN(tmp);
+ dt->tm_wday = bcd2bin(tmp);
tmp = v3020_get_reg(chip, V3020_YEAR);
- dt->tm_year = BCD2BIN(tmp)+100;
+ dt->tm_year = bcd2bin(tmp)+100;
#ifdef DEBUG
printk("\n%s : Read RTC values\n",__func__);
@@ -136,13 +136,13 @@ static int v3020_set_time(struct device *dev, struct rtc_time *dt)
#endif
/* Write all the values to ram... */
- v3020_set_reg(chip, V3020_SECONDS, BIN2BCD(dt->tm_sec));
- v3020_set_reg(chip, V3020_MINUTES, BIN2BCD(dt->tm_min));
- v3020_set_reg(chip, V3020_HOURS, BIN2BCD(dt->tm_hour));
- v3020_set_reg(chip, V3020_MONTH_DAY, BIN2BCD(dt->tm_mday));
- v3020_set_reg(chip, V3020_MONTH, BIN2BCD(dt->tm_mon + 1));
- v3020_set_reg(chip, V3020_WEEK_DAY, BIN2BCD(dt->tm_wday));
- v3020_set_reg(chip, V3020_YEAR, BIN2BCD(dt->tm_year % 100));
+ v3020_set_reg(chip, V3020_SECONDS, bin2bcd(dt->tm_sec));
+ v3020_set_reg(chip, V3020_MINUTES, bin2bcd(dt->tm_min));
+ v3020_set_reg(chip, V3020_HOURS, bin2bcd(dt->tm_hour));
+ v3020_set_reg(chip, V3020_MONTH_DAY, bin2bcd(dt->tm_mday));
+ v3020_set_reg(chip, V3020_MONTH, bin2bcd(dt->tm_mon + 1));
+ v3020_set_reg(chip, V3020_WEEK_DAY, bin2bcd(dt->tm_wday));
+ v3020_set_reg(chip, V3020_YEAR, bin2bcd(dt->tm_year % 100));
/* ...and set the clock. */
v3020_set_reg(chip, V3020_CMD_RAM2CLOCK, 0);
diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c
index 7dcfba1bbfe..310c10795e9 100644
--- a/drivers/rtc/rtc-x1205.c
+++ b/drivers/rtc/rtc-x1205.c
@@ -118,13 +118,13 @@ static int x1205_get_datetime(struct i2c_client *client, struct rtc_time *tm,
for (i = 0; i <= 4; i++)
buf[i] &= 0x7F;
- tm->tm_sec = BCD2BIN(buf[CCR_SEC]);
- tm->tm_min = BCD2BIN(buf[CCR_MIN]);
- tm->tm_hour = BCD2BIN(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
- tm->tm_mday = BCD2BIN(buf[CCR_MDAY]);
- tm->tm_mon = BCD2BIN(buf[CCR_MONTH]) - 1; /* mon is 0-11 */
- tm->tm_year = BCD2BIN(buf[CCR_YEAR])
- + (BCD2BIN(buf[CCR_Y2K]) * 100) - 1900;
+ tm->tm_sec = bcd2bin(buf[CCR_SEC]);
+ tm->tm_min = bcd2bin(buf[CCR_MIN]);
+ tm->tm_hour = bcd2bin(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
+ tm->tm_mday = bcd2bin(buf[CCR_MDAY]);
+ tm->tm_mon = bcd2bin(buf[CCR_MONTH]) - 1; /* mon is 0-11 */
+ tm->tm_year = bcd2bin(buf[CCR_YEAR])
+ + (bcd2bin(buf[CCR_Y2K]) * 100) - 1900;
tm->tm_wday = buf[CCR_WDAY];
dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
@@ -174,11 +174,11 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
__func__,
tm->tm_sec, tm->tm_min, tm->tm_hour);
- buf[CCR_SEC] = BIN2BCD(tm->tm_sec);
- buf[CCR_MIN] = BIN2BCD(tm->tm_min);
+ buf[CCR_SEC] = bin2bcd(tm->tm_sec);
+ buf[CCR_MIN] = bin2bcd(tm->tm_min);
/* set hour and 24hr bit */
- buf[CCR_HOUR] = BIN2BCD(tm->tm_hour) | X1205_HR_MIL;
+ buf[CCR_HOUR] = bin2bcd(tm->tm_hour) | X1205_HR_MIL;
/* should we also set the date? */
if (datetoo) {
@@ -187,15 +187,15 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
__func__,
tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
- buf[CCR_MDAY] = BIN2BCD(tm->tm_mday);
+ buf[CCR_MDAY] = bin2bcd(tm->tm_mday);
/* month, 1 - 12 */
- buf[CCR_MONTH] = BIN2BCD(tm->tm_mon + 1);
+ buf[CCR_MONTH] = bin2bcd(tm->tm_mon + 1);
/* year, since the rtc epoch*/
- buf[CCR_YEAR] = BIN2BCD(tm->tm_year % 100);
+ buf[CCR_YEAR] = bin2bcd(tm->tm_year % 100);
buf[CCR_WDAY] = tm->tm_wday & 0x07;
- buf[CCR_Y2K] = BIN2BCD(tm->tm_year / 100);
+ buf[CCR_Y2K] = bin2bcd(tm->tm_year / 100);
}
/* If writing alarm registers, set compare bits on registers 0-4 */
@@ -437,7 +437,7 @@ static int x1205_validate_client(struct i2c_client *client)
return -EIO;
}
- value = BCD2BIN(reg & probe_limits_pattern[i].mask);
+ value = bcd2bin(reg & probe_limits_pattern[i].mask);
if (value > probe_limits_pattern[i].max ||
value < probe_limits_pattern[i].min) {
diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c
index 69f8346aa28..5877f29a600 100644
--- a/drivers/scsi/arcmsr/arcmsr_attr.c
+++ b/drivers/scsi/arcmsr/arcmsr_attr.c
@@ -189,7 +189,6 @@ static struct bin_attribute arcmsr_sysfs_message_read_attr = {
.attr = {
.name = "mu_read",
.mode = S_IRUSR ,
- .owner = THIS_MODULE,
},
.size = 1032,
.read = arcmsr_sysfs_iop_message_read,
@@ -199,7 +198,6 @@ static struct bin_attribute arcmsr_sysfs_message_write_attr = {
.attr = {
.name = "mu_write",
.mode = S_IWUSR,
- .owner = THIS_MODULE,
},
.size = 1032,
.write = arcmsr_sysfs_iop_message_write,
@@ -209,7 +207,6 @@ static struct bin_attribute arcmsr_sysfs_message_clear_attr = {
.attr = {
.name = "mu_clear",
.mode = S_IWUSR,
- .owner = THIS_MODULE,
},
.size = 1,
.write = arcmsr_sysfs_iop_message_clear,
diff --git a/drivers/scsi/sr_vendor.c b/drivers/scsi/sr_vendor.c
index 4eb3da996b3..4ad3e017213 100644
--- a/drivers/scsi/sr_vendor.c
+++ b/drivers/scsi/sr_vendor.c
@@ -223,9 +223,9 @@ int sr_cd_check(struct cdrom_device_info *cdi)
no_multi = 1;
break;
}
- min = BCD2BIN(buffer[15]);
- sec = BCD2BIN(buffer[16]);
- frame = BCD2BIN(buffer[17]);
+ min = bcd2bin(buffer[15]);
+ sec = bcd2bin(buffer[16]);
+ frame = bcd2bin(buffer[17]);
sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame;
break;
}
@@ -252,9 +252,9 @@ int sr_cd_check(struct cdrom_device_info *cdi)
}
if (rc != 0)
break;
- min = BCD2BIN(buffer[1]);
- sec = BCD2BIN(buffer[2]);
- frame = BCD2BIN(buffer[3]);
+ min = bcd2bin(buffer[1]);
+ sec = bcd2bin(buffer[2]);
+ frame = bcd2bin(buffer[3]);
sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame;
if (sector)
sector -= CD_MSF_OFFSET;
diff --git a/drivers/serial/8250_gsc.c b/drivers/serial/8250_gsc.c
index 0416ad3bc12..418b4fe9a0a 100644
--- a/drivers/serial/8250_gsc.c
+++ b/drivers/serial/8250_gsc.c
@@ -111,7 +111,7 @@ static struct parisc_driver serial_driver = {
.probe = serial_init_chip,
};
-int __init probe_serial_gsc(void)
+static int __init probe_serial_gsc(void)
{
register_parisc_driver(&lasi_driver);
register_parisc_driver(&serial_driver);
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index 8fcb4c5b9a2..7313c2edcb8 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -1039,7 +1039,7 @@ static int __devinit serial_txx9_probe(struct platform_device *dev)
ret = serial_txx9_register_port(&port);
if (ret < 0) {
dev_err(&dev->dev, "unable to register port at index %d "
- "(IO%x MEM%llx IRQ%d): %d\n", i,
+ "(IO%lx MEM%llx IRQ%d): %d\n", i,
p->iobase, (unsigned long long)p->mapbase,
p->irq, ret);
}
diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c
index b73e3c0056c..d5276c012f7 100644
--- a/drivers/serial/sn_console.c
+++ b/drivers/serial/sn_console.c
@@ -61,7 +61,7 @@
#define SN_SAL_BUFFER_SIZE (64 * (1 << 10))
#define SN_SAL_UART_FIFO_DEPTH 16
-#define SN_SAL_UART_FIFO_SPEED_CPS 9600/10
+#define SN_SAL_UART_FIFO_SPEED_CPS (9600/10)
/* sn_transmit_chars() calling args */
#define TRANSMIT_BUFFERED 0
diff --git a/drivers/staging/go7007/Kconfig b/drivers/staging/go7007/Kconfig
index 57a121c338c..593fdb767aa 100644
--- a/drivers/staging/go7007/Kconfig
+++ b/drivers/staging/go7007/Kconfig
@@ -1,10 +1,12 @@
config VIDEO_GO7007
tristate "Go 7007 support"
depends on VIDEO_DEV && PCI && I2C && INPUT
+ depends on SND
select VIDEOBUF_DMA_SG
select VIDEO_IR
select VIDEO_TUNER
select VIDEO_TVEEPROM
+ select SND_PCM
select CRC32
default N
---help---
diff --git a/drivers/staging/sxg/Kconfig b/drivers/staging/sxg/Kconfig
index 1ae35080660..6e6cf0b9ef9 100644
--- a/drivers/staging/sxg/Kconfig
+++ b/drivers/staging/sxg/Kconfig
@@ -1,6 +1,7 @@
config SXG
tristate "Alacritech SLIC Technology Non-Accelerated 10Gbe support"
depends on PCI && NETDEV_10000
+ depends on X86
default n
help
This driver supports the Alacritech SLIC Technology Non-Accelerated
diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c
index 4d74ba36c3a..37caf4d6903 100644
--- a/drivers/telephony/phonedev.c
+++ b/drivers/telephony/phonedev.c
@@ -54,7 +54,6 @@ static int phone_open(struct inode *inode, struct file *file)
if (minor >= PHONE_NUM_DEVICES)
return -ENODEV;
- lock_kernel();
mutex_lock(&phone_lock);
p = phone_device[minor];
if (p)
@@ -81,7 +80,6 @@ static int phone_open(struct inode *inode, struct file *file)
fops_put(old_fops);
end:
mutex_unlock(&phone_lock);
- unlock_kernel();
return err;
}
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 217c5118ae9..cd5f20da738 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1002,101 +1002,132 @@ fb_blank(struct fb_info *info, int blank)
return ret;
}
-static int
-fb_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+static long
+fb_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
int fbidx = iminor(inode);
- struct fb_info *info = registered_fb[fbidx];
- struct fb_ops *fb = info->fbops;
+ struct fb_info *info;
+ struct fb_ops *fb;
struct fb_var_screeninfo var;
struct fb_fix_screeninfo fix;
struct fb_con2fbmap con2fb;
struct fb_cmap_user cmap;
struct fb_event event;
void __user *argp = (void __user *)arg;
- int i;
-
- if (!fb)
+ long ret = 0;
+
+ info = registered_fb[fbidx];
+ mutex_lock(&info->lock);
+ fb = info->fbops;
+
+ if (!fb) {
+ mutex_unlock(&info->lock);
return -ENODEV;
+ }
switch (cmd) {
case FBIOGET_VSCREENINFO:
- return copy_to_user(argp, &info->var,
+ ret = copy_to_user(argp, &info->var,
sizeof(var)) ? -EFAULT : 0;
+ break;
case FBIOPUT_VSCREENINFO:
- if (copy_from_user(&var, argp, sizeof(var)))
- return -EFAULT;
+ if (copy_from_user(&var, argp, sizeof(var))) {
+ ret = -EFAULT;
+ break;
+ }
acquire_console_sem();
info->flags |= FBINFO_MISC_USEREVENT;
- i = fb_set_var(info, &var);
+ ret = fb_set_var(info, &var);
info->flags &= ~FBINFO_MISC_USEREVENT;
release_console_sem();
- if (i) return i;
- if (copy_to_user(argp, &var, sizeof(var)))
- return -EFAULT;
- return 0;
+ if (ret == 0 && copy_to_user(argp, &var, sizeof(var)))
+ ret = -EFAULT;
+ break;
case FBIOGET_FSCREENINFO:
- return copy_to_user(argp, &info->fix,
+ ret = copy_to_user(argp, &info->fix,
sizeof(fix)) ? -EFAULT : 0;
+ break;
case FBIOPUTCMAP:
if (copy_from_user(&cmap, argp, sizeof(cmap)))
- return -EFAULT;
- return (fb_set_user_cmap(&cmap, info));
+ ret = -EFAULT;
+ else
+ ret = fb_set_user_cmap(&cmap, info);
+ break;
case FBIOGETCMAP:
if (copy_from_user(&cmap, argp, sizeof(cmap)))
- return -EFAULT;
- return fb_cmap_to_user(&info->cmap, &cmap);
+ ret = -EFAULT;
+ else
+ ret = fb_cmap_to_user(&info->cmap, &cmap);
+ break;
case FBIOPAN_DISPLAY:
- if (copy_from_user(&var, argp, sizeof(var)))
- return -EFAULT;
+ if (copy_from_user(&var, argp, sizeof(var))) {
+ ret = -EFAULT;
+ break;
+ }
acquire_console_sem();
- i = fb_pan_display(info, &var);
+ ret = fb_pan_display(info, &var);
release_console_sem();
- if (i)
- return i;
- if (copy_to_user(argp, &var, sizeof(var)))
- return -EFAULT;
- return 0;
+ if (ret == 0 && copy_to_user(argp, &var, sizeof(var)))
+ ret = -EFAULT;
+ break;
case FBIO_CURSOR:
- return -EINVAL;
+ ret = -EINVAL;
+ break;
case FBIOGET_CON2FBMAP:
if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
- return -EFAULT;
- if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
- return -EINVAL;
- con2fb.framebuffer = -1;
- event.info = info;
- event.data = &con2fb;
- fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event);
- return copy_to_user(argp, &con2fb,
+ ret = -EFAULT;
+ else if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
+ ret = -EINVAL;
+ else {
+ con2fb.framebuffer = -1;
+ event.info = info;
+ event.data = &con2fb;
+ fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP,
+ &event);
+ ret = copy_to_user(argp, &con2fb,
sizeof(con2fb)) ? -EFAULT : 0;
+ }
+ break;
case FBIOPUT_CON2FBMAP:
- if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
- return - EFAULT;
- if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
- return -EINVAL;
- if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX)
- return -EINVAL;
- if (!registered_fb[con2fb.framebuffer])
- request_module("fb%d", con2fb.framebuffer);
+ if (copy_from_user(&con2fb, argp, sizeof(con2fb))) {
+ ret = -EFAULT;
+ break;
+ }
+ if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) {
+ ret = -EINVAL;
+ break;
+ }
+ if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) {
+ ret = -EINVAL;
+ break;
+ }
if (!registered_fb[con2fb.framebuffer])
- return -EINVAL;
+ request_module("fb%d", con2fb.framebuffer);
+ if (!registered_fb[con2fb.framebuffer]) {
+ ret = -EINVAL;
+ break;
+ }
event.info = info;
event.data = &con2fb;
- return fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP,
+ ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP,
&event);
+ break;
case FBIOBLANK:
acquire_console_sem();
info->flags |= FBINFO_MISC_USEREVENT;
- i = fb_blank(info, arg);
+ ret = fb_blank(info, arg);
info->flags &= ~FBINFO_MISC_USEREVENT;
release_console_sem();
- return i;
+ break;;
default:
if (fb->fb_ioctl == NULL)
- return -EINVAL;
- return fb->fb_ioctl(info, cmd, arg);
+ ret = -ENOTTY;
+ else
+ ret = fb->fb_ioctl(info, cmd, arg);
}
+ mutex_unlock(&info->lock);
+ return ret;
}
#ifdef CONFIG_COMPAT
@@ -1150,7 +1181,7 @@ static int fb_getput_cmap(struct inode *inode, struct file *file,
put_user(compat_ptr(data), &cmap->transp))
return -EFAULT;
- err = fb_ioctl(inode, file, cmd, (unsigned long) cmap);
+ err = fb_ioctl(file, cmd, (unsigned long) cmap);
if (!err) {
if (copy_in_user(&cmap32->start,
@@ -1204,7 +1235,7 @@ static int fb_get_fscreeninfo(struct inode *inode, struct file *file,
old_fs = get_fs();
set_fs(KERNEL_DS);
- err = fb_ioctl(inode, file, cmd, (unsigned long) &fix);
+ err = fb_ioctl(file, cmd, (unsigned long) &fix);
set_fs(old_fs);
if (!err)
@@ -1222,7 +1253,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct fb_ops *fb = info->fbops;
long ret = -ENOIOCTLCMD;
- lock_kernel();
+ mutex_lock(&info->lock);
switch(cmd) {
case FBIOGET_VSCREENINFO:
case FBIOPUT_VSCREENINFO:
@@ -1231,7 +1262,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case FBIOPUT_CON2FBMAP:
arg = (unsigned long) compat_ptr(arg);
case FBIOBLANK:
- ret = fb_ioctl(inode, file, cmd, arg);
+ ret = fb_ioctl(file, cmd, arg);
break;
case FBIOGET_FSCREENINFO:
@@ -1248,7 +1279,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
ret = fb->fb_compat_ioctl(info, cmd, arg);
break;
}
- unlock_kernel();
+ mutex_unlock(&info->lock);
return ret;
}
#endif
@@ -1270,13 +1301,13 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
return -ENODEV;
if (fb->fb_mmap) {
int res;
- lock_kernel();
+ mutex_lock(&info->lock);
res = fb->fb_mmap(info, vma);
- unlock_kernel();
+ mutex_unlock(&info->lock);
return res;
}
- lock_kernel();
+ mutex_lock(&info->lock);
/* frame buffer memory */
start = info->fix.smem_start;
@@ -1285,13 +1316,13 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
/* memory mapped io */
off -= len;
if (info->var.accel_flags) {
- unlock_kernel();
+ mutex_unlock(&info->lock);
return -EINVAL;
}
start = info->fix.mmio_start;
len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len);
}
- unlock_kernel();
+ mutex_unlock(&info->lock);
start &= PAGE_MASK;
if ((vma->vm_end - vma->vm_start + off) > len)
return -EINVAL;
@@ -1315,13 +1346,13 @@ fb_open(struct inode *inode, struct file *file)
if (fbidx >= FB_MAX)
return -ENODEV;
- lock_kernel();
- if (!(info = registered_fb[fbidx]))
+ info = registered_fb[fbidx];
+ if (!info)
request_module("fb%d", fbidx);
- if (!(info = registered_fb[fbidx])) {
- res = -ENODEV;
- goto out;
- }
+ info = registered_fb[fbidx];
+ if (!info)
+ return -ENODEV;
+ mutex_lock(&info->lock);
if (!try_module_get(info->fbops->owner)) {
res = -ENODEV;
goto out;
@@ -1337,7 +1368,7 @@ fb_open(struct inode *inode, struct file *file)
fb_deferred_io_open(info, inode, file);
#endif
out:
- unlock_kernel();
+ mutex_unlock(&info->lock);
return res;
}
@@ -1346,11 +1377,11 @@ fb_release(struct inode *inode, struct file *file)
{
struct fb_info * const info = file->private_data;
- lock_kernel();
+ mutex_lock(&info->lock);
if (info->fbops->fb_release)
info->fbops->fb_release(info,1);
module_put(info->fbops->owner);
- unlock_kernel();
+ mutex_unlock(&info->lock);
return 0;
}
@@ -1358,7 +1389,7 @@ static const struct file_operations fb_fops = {
.owner = THIS_MODULE,
.read = fb_read,
.write = fb_write,
- .ioctl = fb_ioctl,
+ .unlocked_ioctl = fb_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = fb_compat_ioctl,
#endif
@@ -1429,6 +1460,7 @@ register_framebuffer(struct fb_info *fb_info)
if (!registered_fb[i])
break;
fb_info->node = i;
+ mutex_init(&fb_info->lock);
fb_info->dev = device_create(fb_class, fb_info->device,
MKDEV(FB_MAJOR, i), NULL, "fb%d", i);
diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c
index ed6b0576208..1f09d4e4144 100644
--- a/drivers/w1/slaves/w1_ds2760.c
+++ b/drivers/w1/slaves/w1_ds2760.c
@@ -80,7 +80,6 @@ static struct bin_attribute w1_ds2760_bin_attr = {
.attr = {
.name = "w1_slave",
.mode = S_IRUGO,
- .owner = THIS_MODULE,
},
.size = DS2760_DATA_SIZE,
.read = w1_ds2760_read_bin,
diff --git a/fs/Kconfig b/fs/Kconfig
index d0a1174fb51..4eca61c201f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1168,195 +1168,7 @@ config EFS_FS
To compile the EFS file system support as a module, choose M here: the
module will be called efs.
-config JFFS2_FS
- tristate "Journalling Flash File System v2 (JFFS2) support"
- select CRC32
- depends on MTD
- help
- JFFS2 is the second generation of the Journalling Flash File System
- for use on diskless embedded devices. It provides improved wear
- levelling, compression and support for hard links. You cannot use
- this on normal block devices, only on 'MTD' devices.
-
- Further information on the design and implementation of JFFS2 is
- available at <http://sources.redhat.com/jffs2/>.
-
-config JFFS2_FS_DEBUG
- int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
- depends on JFFS2_FS
- default "0"
- help
- This controls the amount of debugging messages produced by the JFFS2
- code. Set it to zero for use in production systems. For evaluation,
- testing and debugging, it's advisable to set it to one. This will
- enable a few assertions and will print debugging messages at the
- KERN_DEBUG loglevel, where they won't normally be visible. Level 2
- is unlikely to be useful - it enables extra debugging in certain
- areas which at one point needed debugging, but when the bugs were
- located and fixed, the detailed messages were relegated to level 2.
-
- If reporting bugs, please try to have available a full dump of the
- messages at debug level 1 while the misbehaviour was occurring.
-
-config JFFS2_FS_WRITEBUFFER
- bool "JFFS2 write-buffering support"
- depends on JFFS2_FS
- default y
- help
- This enables the write-buffering support in JFFS2.
-
- This functionality is required to support JFFS2 on the following
- types of flash devices:
- - NAND flash
- - NOR flash with transparent ECC
- - DataFlash
-
-config JFFS2_FS_WBUF_VERIFY
- bool "Verify JFFS2 write-buffer reads"
- depends on JFFS2_FS_WRITEBUFFER
- default n
- help
- This causes JFFS2 to read back every page written through the
- write-buffer, and check for errors.
-
-config JFFS2_SUMMARY
- bool "JFFS2 summary support (EXPERIMENTAL)"
- depends on JFFS2_FS && EXPERIMENTAL
- default n
- help
- This feature makes it possible to use summary information
- for faster filesystem mount.
-
- The summary information can be inserted into a filesystem image
- by the utility 'sumtool'.
-
- If unsure, say 'N'.
-
-config JFFS2_FS_XATTR
- bool "JFFS2 XATTR support (EXPERIMENTAL)"
- depends on JFFS2_FS && EXPERIMENTAL
- default n
- help
- Extended attributes are name:value pairs associated with inodes by
- the kernel or by users (see the attr(5) manual page, or visit
- <http://acl.bestbits.at/> for details).
-
- If unsure, say N.
-
-config JFFS2_FS_POSIX_ACL
- bool "JFFS2 POSIX Access Control Lists"
- depends on JFFS2_FS_XATTR
- default y
- select FS_POSIX_ACL
- help
- Posix Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
-
- To learn more about Access Control Lists, visit the Posix ACLs for
- Linux website <http://acl.bestbits.at/>.
-
- If you don't know what Access Control Lists are, say N
-
-config JFFS2_FS_SECURITY
- bool "JFFS2 Security Labels"
- depends on JFFS2_FS_XATTR
- default y
- help
- Security labels support alternative access control models
- implemented by security modules like SELinux. This option
- enables an extended attribute handler for file security
- labels in the jffs2 filesystem.
-
- If you are not using a security module that requires using
- extended attributes for file security labels, say N.
-
-config JFFS2_COMPRESSION_OPTIONS
- bool "Advanced compression options for JFFS2"
- depends on JFFS2_FS
- default n
- help
- Enabling this option allows you to explicitly choose which
- compression modules, if any, are enabled in JFFS2. Removing
- compressors can mean you cannot read existing file systems,
- and enabling experimental compressors can mean that you
- write a file system which cannot be read by a standard kernel.
-
- If unsure, you should _definitely_ say 'N'.
-
-config JFFS2_ZLIB
- bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
- select ZLIB_INFLATE
- select ZLIB_DEFLATE
- depends on JFFS2_FS
- default y
- help
- Zlib is designed to be a free, general-purpose, legally unencumbered,
- lossless data-compression library for use on virtually any computer
- hardware and operating system. See <http://www.gzip.org/zlib/> for
- further information.
-
- Say 'Y' if unsure.
-
-config JFFS2_LZO
- bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
- select LZO_COMPRESS
- select LZO_DECOMPRESS
- depends on JFFS2_FS
- default n
- help
- minilzo-based compression. Generally works better than Zlib.
-
- This feature was added in July, 2007. Say 'N' if you need
- compatibility with older bootloaders or kernels.
-
-config JFFS2_RTIME
- bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
- depends on JFFS2_FS
- default y
- help
- Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
-
-config JFFS2_RUBIN
- bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
- depends on JFFS2_FS
- default n
- help
- RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
-
-choice
- prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
- default JFFS2_CMODE_PRIORITY
- depends on JFFS2_FS
- help
- You can set here the default compression mode of JFFS2 from
- the available compression modes. Don't touch if unsure.
-
-config JFFS2_CMODE_NONE
- bool "no compression"
- help
- Uses no compression.
-
-config JFFS2_CMODE_PRIORITY
- bool "priority"
- help
- Tries the compressors in a predefined order and chooses the first
- successful one.
-
-config JFFS2_CMODE_SIZE
- bool "size (EXPERIMENTAL)"
- help
- Tries all compressors and chooses the one which has the smallest
- result.
-
-config JFFS2_CMODE_FAVOURLZO
- bool "Favour LZO"
- help
- Tries all compressors and chooses the one which has the smallest
- result but gives some preference to LZO (which has faster
- decompression) at the expense of size.
-
-endchoice
-
+source "fs/jffs2/Kconfig"
# UBIFS File system configuration
source "fs/ubifs/Kconfig"
@@ -1913,148 +1725,7 @@ config SMB_NLS_REMOTE
smbmount from samba 2.2.0 or later supports this.
-config CIFS
- tristate "CIFS support (advanced network filesystem, SMBFS successor)"
- depends on INET
- select NLS
- help
- This is the client VFS module for the Common Internet File System
- (CIFS) protocol which is the successor to the Server Message Block
- (SMB) protocol, the native file sharing mechanism for most early
- PC operating systems. The CIFS protocol is fully supported by
- file servers such as Windows 2000 (including Windows 2003, NT 4
- and Windows XP) as well by Samba (which provides excellent CIFS
- server support for Linux and many other operating systems). Limited
- support for OS/2 and Windows ME and similar servers is provided as
- well.
-
- The cifs module provides an advanced network file system
- client for mounting to CIFS compliant servers. It includes
- support for DFS (hierarchical name space), secure per-user
- session establishment via Kerberos or NTLM or NTLMv2,
- safe distributed caching (oplock), optional packet
- signing, Unicode and other internationalization improvements.
- If you need to mount to Samba or Windows from this machine, say Y.
-
-config CIFS_STATS
- bool "CIFS statistics"
- depends on CIFS
- help
- Enabling this option will cause statistics for each server share
- mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
-
-config CIFS_STATS2
- bool "Extended statistics"
- depends on CIFS_STATS
- help
- Enabling this option will allow more detailed statistics on SMB
- request timing to be displayed in /proc/fs/cifs/DebugData and also
- allow optional logging of slow responses to dmesg (depending on the
- value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
- These additional statistics may have a minor effect on performance
- and memory utilization.
-
- Unless you are a developer or are doing network performance analysis
- or tuning, say N.
-
-config CIFS_WEAK_PW_HASH
- bool "Support legacy servers which use weaker LANMAN security"
- depends on CIFS
- help
- Modern CIFS servers including Samba and most Windows versions
- (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
- security mechanisms. These hash the password more securely
- than the mechanisms used in the older LANMAN version of the
- SMB protocol but LANMAN based authentication is needed to
- establish sessions with some old SMB servers.
-
- Enabling this option allows the cifs module to mount to older
- LANMAN based servers such as OS/2 and Windows 95, but such
- mounts may be less secure than mounts using NTLM or more recent
- security mechanisms if you are on a public network. Unless you
- have a need to access old SMB servers (and are on a private
- network) you probably want to say N. Even if this support
- is enabled in the kernel build, LANMAN authentication will not be
- used automatically. At runtime LANMAN mounts are disabled but
- can be set to required (or optional) either in
- /proc/fs/cifs (see fs/cifs/README for more detail) or via an
- option on the mount command. This support is disabled by
- default in order to reduce the possibility of a downgrade
- attack.
-
- If unsure, say N.
-
-config CIFS_UPCALL
- bool "Kerberos/SPNEGO advanced session setup"
- depends on CIFS && KEYS
- help
- Enables an upcall mechanism for CIFS which accesses
- userspace helper utilities to provide SPNEGO packaged (RFC 4178)
- Kerberos tickets which are needed to mount to certain secure servers
- (for which more secure Kerberos authentication is required). If
- unsure, say N.
-
-config CIFS_XATTR
- bool "CIFS extended attributes"
- depends on CIFS
- help
- Extended attributes are name:value pairs associated with inodes by
- the kernel or by users (see the attr(5) manual page, or visit
- <http://acl.bestbits.at/> for details). CIFS maps the name of
- extended attributes beginning with the user namespace prefix
- to SMB/CIFS EAs. EAs are stored on Windows servers without the
- user namespace prefix, but their names are seen by Linux cifs clients
- prefaced by the user namespace prefix. The system namespace
- (used by some filesystems to store ACLs) is not supported at
- this time.
-
- If unsure, say N.
-
-config CIFS_POSIX
- bool "CIFS POSIX Extensions"
- depends on CIFS_XATTR
- help
- Enabling this option will cause the cifs client to attempt to
- negotiate a newer dialect with servers, such as Samba 3.0.5
- or later, that optionally can handle more POSIX like (rather
- than Windows like) file behavior. It also enables
- support for POSIX ACLs (getfacl and setfacl) to servers
- (such as Samba 3.10 and later) which can negotiate
- CIFS POSIX ACL support. If unsure, say N.
-
-config CIFS_DEBUG2
- bool "Enable additional CIFS debugging routines"
- depends on CIFS
- help
- Enabling this option adds a few more debugging routines
- to the cifs code which slightly increases the size of
- the cifs module and can cause additional logging of debug
- messages in some error paths, slowing performance. This
- option can be turned off unless you are debugging
- cifs problems. If unsure, say N.
-
-config CIFS_EXPERIMENTAL
- bool "CIFS Experimental Features (EXPERIMENTAL)"
- depends on CIFS && EXPERIMENTAL
- help
- Enables cifs features under testing. These features are
- experimental and currently include DFS support and directory
- change notification ie fcntl(F_DNOTIFY), as well as the upcall
- mechanism which will be used for Kerberos session negotiation
- and uid remapping. Some of these features also may depend on
- setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
- (which is disabled by default). See the file fs/cifs/README
- for more details. If unsure, say N.
-
-config CIFS_DFS_UPCALL
- bool "DFS feature support (EXPERIMENTAL)"
- depends on CIFS_EXPERIMENTAL
- depends on KEYS
- help
- Enables an upcall mechanism for CIFS which contacts userspace
- helper utilities to provide server name resolution (host names to
- IP addresses) which is needed for implicit mounts of DFS junction
- points. If unsure, say N.
+source "fs/cifs/Kconfig"
config NCP_FS
tristate "NCP file system support (to mount NetWare volumes)"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 801db134181..ce9fb3fbfae 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -40,6 +40,28 @@ config BINFMT_ELF_FDPIC
It is also possible to run FDPIC ELF binaries on MMU linux also.
+config CORE_DUMP_DEFAULT_ELF_HEADERS
+ bool "Write ELF core dumps with partial segments"
+ default n
+ depends on BINFMT_ELF
+ help
+ ELF core dump files describe each memory mapping of the crashed
+ process, and can contain or omit the memory contents of each one.
+ The contents of an unmodified text mapping are omitted by default.
+
+ For an unmodified text mapping of an ELF object, including just
+ the first page of the file in a core dump makes it possible to
+ identify the build ID bits in the file, without paying the i/o
+ cost and disk space to dump all the text. However, versions of
+ GDB before 6.7 are confused by ELF core dump files in this format.
+
+ The core dump behavior can be controlled per process using
+ the /proc/PID/coredump_filter pseudo-file; this setting is
+ inherited. See Documentation/filesystems/proc.txt for details.
+
+ This config option changes the default setting of coredump_filter
+ seen at boot time. If unsure, say N.
+
config BINFMT_FLAT
bool "Kernel support for flat binaries"
depends on !MMU && (!FRV || BROKEN)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c76afa26edf..e2159063198 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1156,16 +1156,24 @@ static int dump_seek(struct file *file, loff_t off)
static unsigned long vma_dump_size(struct vm_area_struct *vma,
unsigned long mm_flags)
{
+#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
+
/* The vma can be set up to tell us the answer directly. */
if (vma->vm_flags & VM_ALWAYSDUMP)
goto whole;
+ /* Hugetlb memory check */
+ if (vma->vm_flags & VM_HUGETLB) {
+ if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
+ goto whole;
+ if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
+ goto whole;
+ }
+
/* Do not dump I/O mapped devices or special mappings */
if (vma->vm_flags & (VM_IO | VM_RESERVED))
return 0;
-#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
-
/* By default, dump shared memory if mapped from an anonymous file. */
if (vma->vm_flags & VM_SHARED) {
if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
diff --git a/fs/buffer.c b/fs/buffer.c
index ac78d4c19b3..6569fda5cfe 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -76,8 +76,7 @@ EXPORT_SYMBOL(__lock_buffer);
void unlock_buffer(struct buffer_head *bh)
{
- smp_mb__before_clear_bit();
- clear_buffer_locked(bh);
+ clear_bit_unlock(BH_Lock, &bh->b_state);
smp_mb__after_clear_bit();
wake_up_bit(&bh->b_state, BH_Lock);
}
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
new file mode 100644
index 00000000000..341a98965bd
--- /dev/null
+++ b/fs/cifs/Kconfig
@@ -0,0 +1,142 @@
+config CIFS
+ tristate "CIFS support (advanced network filesystem, SMBFS successor)"
+ depends on INET
+ select NLS
+ help
+ This is the client VFS module for the Common Internet File System
+ (CIFS) protocol which is the successor to the Server Message Block
+ (SMB) protocol, the native file sharing mechanism for most early
+ PC operating systems. The CIFS protocol is fully supported by
+ file servers such as Windows 2000 (including Windows 2003, NT 4
+ and Windows XP) as well by Samba (which provides excellent CIFS
+ server support for Linux and many other operating systems). Limited
+ support for OS/2 and Windows ME and similar servers is provided as
+ well.
+
+ The cifs module provides an advanced network file system
+ client for mounting to CIFS compliant servers. It includes
+ support for DFS (hierarchical name space), secure per-user
+ session establishment via Kerberos or NTLM or NTLMv2,
+ safe distributed caching (oplock), optional packet
+ signing, Unicode and other internationalization improvements.
+ If you need to mount to Samba or Windows from this machine, say Y.
+
+config CIFS_STATS
+ bool "CIFS statistics"
+ depends on CIFS
+ help
+ Enabling this option will cause statistics for each server share
+ mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
+
+config CIFS_STATS2
+ bool "Extended statistics"
+ depends on CIFS_STATS
+ help
+ Enabling this option will allow more detailed statistics on SMB
+ request timing to be displayed in /proc/fs/cifs/DebugData and also
+ allow optional logging of slow responses to dmesg (depending on the
+ value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
+ These additional statistics may have a minor effect on performance
+ and memory utilization.
+
+ Unless you are a developer or are doing network performance analysis
+ or tuning, say N.
+
+config CIFS_WEAK_PW_HASH
+ bool "Support legacy servers which use weaker LANMAN security"
+ depends on CIFS
+ help
+ Modern CIFS servers including Samba and most Windows versions
+ (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
+ security mechanisms. These hash the password more securely
+ than the mechanisms used in the older LANMAN version of the
+ SMB protocol but LANMAN based authentication is needed to
+ establish sessions with some old SMB servers.
+
+ Enabling this option allows the cifs module to mount to older
+ LANMAN based servers such as OS/2 and Windows 95, but such
+ mounts may be less secure than mounts using NTLM or more recent
+ security mechanisms if you are on a public network. Unless you
+ have a need to access old SMB servers (and are on a private
+ network) you probably want to say N. Even if this support
+ is enabled in the kernel build, LANMAN authentication will not be
+ used automatically. At runtime LANMAN mounts are disabled but
+ can be set to required (or optional) either in
+ /proc/fs/cifs (see fs/cifs/README for more detail) or via an
+ option on the mount command. This support is disabled by
+ default in order to reduce the possibility of a downgrade
+ attack.
+
+ If unsure, say N.
+
+config CIFS_UPCALL
+ bool "Kerberos/SPNEGO advanced session setup"
+ depends on CIFS && KEYS
+ help
+ Enables an upcall mechanism for CIFS which accesses
+ userspace helper utilities to provide SPNEGO packaged (RFC 4178)
+ Kerberos tickets which are needed to mount to certain secure servers
+ (for which more secure Kerberos authentication is required). If
+ unsure, say N.
+
+config CIFS_XATTR
+ bool "CIFS extended attributes"
+ depends on CIFS
+ help
+ Extended attributes are name:value pairs associated with inodes by
+ the kernel or by users (see the attr(5) manual page, or visit
+ <http://acl.bestbits.at/> for details). CIFS maps the name of
+ extended attributes beginning with the user namespace prefix
+ to SMB/CIFS EAs. EAs are stored on Windows servers without the
+ user namespace prefix, but their names are seen by Linux cifs clients
+ prefaced by the user namespace prefix. The system namespace
+ (used by some filesystems to store ACLs) is not supported at
+ this time.
+
+ If unsure, say N.
+
+config CIFS_POSIX
+ bool "CIFS POSIX Extensions"
+ depends on CIFS_XATTR
+ help
+ Enabling this option will cause the cifs client to attempt to
+ negotiate a newer dialect with servers, such as Samba 3.0.5
+ or later, that optionally can handle more POSIX like (rather
+ than Windows like) file behavior. It also enables
+ support for POSIX ACLs (getfacl and setfacl) to servers
+ (such as Samba 3.10 and later) which can negotiate
+ CIFS POSIX ACL support. If unsure, say N.
+
+config CIFS_DEBUG2
+ bool "Enable additional CIFS debugging routines"
+ depends on CIFS
+ help
+ Enabling this option adds a few more debugging routines
+ to the cifs code which slightly increases the size of
+ the cifs module and can cause additional logging of debug
+ messages in some error paths, slowing performance. This
+ option can be turned off unless you are debugging
+ cifs problems. If unsure, say N.
+
+config CIFS_EXPERIMENTAL
+ bool "CIFS Experimental Features (EXPERIMENTAL)"
+ depends on CIFS && EXPERIMENTAL
+ help
+ Enables cifs features under testing. These features are
+ experimental and currently include DFS support and directory
+ change notification ie fcntl(F_DNOTIFY), as well as the upcall
+ mechanism which will be used for Kerberos session negotiation
+ and uid remapping. Some of these features also may depend on
+ setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
+ (which is disabled by default). See the file fs/cifs/README
+ for more details. If unsure, say N.
+
+config CIFS_DFS_UPCALL
+ bool "DFS feature support (EXPERIMENTAL)"
+ depends on CIFS_EXPERIMENTAL
+ depends on KEYS
+ help
+ Enables an upcall mechanism for CIFS which contacts userspace
+ helper utilities to provide server name resolution (host names to
+ IP addresses) which is needed for implicit mounts of DFS junction
+ points. If unsure, say N.
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c4a8a060512..62d8bd8f14c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
SetPageUptodate(page);
unlock_page(page);
if (!pagevec_add(plru_pvec, page))
- __pagevec_lru_add(plru_pvec);
+ __pagevec_lru_add_file(plru_pvec);
data += PAGE_CACHE_SIZE;
}
return;
@@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
bytes_read = 0;
}
- pagevec_lru_add(&lru_pvec);
+ pagevec_lru_add_file(&lru_pvec);
/* need to free smb_read_data buf before exit */
if (smb_read_data) {
diff --git a/fs/exec.c b/fs/exec.c
index a41e7902ed0..4e834f16d9d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1386,7 +1386,7 @@ EXPORT_SYMBOL(set_binfmt);
* name into corename, which must have space for at least
* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
*/
-static int format_corename(char *corename, int nr_threads, long signr)
+static int format_corename(char *corename, long signr)
{
const char *pat_ptr = core_pattern;
int ispipe = (*pat_ptr == '|');
@@ -1493,8 +1493,7 @@ static int format_corename(char *corename, int nr_threads, long signr)
* If core_pattern does not include a %p (as is the default)
* and core_uses_pid is set, then .%pid will be appended to
* the filename. Do not do this for piped commands. */
- if (!ispipe && !pid_in_pattern
- && (core_uses_pid || nr_threads)) {
+ if (!ispipe && !pid_in_pattern && core_uses_pid) {
rc = snprintf(out_ptr, out_end - out_ptr,
".%d", task_tgid_vnr(current));
if (rc > out_end - out_ptr)
@@ -1757,7 +1756,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
* uses lock_kernel()
*/
lock_kernel();
- ispipe = format_corename(corename, retval, signr);
+ ispipe = format_corename(corename, signr);
unlock_kernel();
/*
* Don't bother to check the RLIMIT_CORE value if core_pattern points
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 92fd0338a6e..f5b57a2ca35 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1547,6 +1547,7 @@ retry_alloc:
* turn off reservation for this allocation
*/
if (my_rsv && (free_blocks < windowsz)
+ && (free_blocks > 0)
&& (rsv_is_empty(&my_rsv->rsv_window)))
my_rsv = NULL;
@@ -1585,7 +1586,7 @@ retry_alloc:
* free blocks is less than half of the reservation
* window size.
*/
- if (free_blocks <= (windowsz/2))
+ if (my_rsv && (free_blocks <= (windowsz/2)))
continue;
brelse(bitmap_bh);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 2eea96ec78e..4c82531ea0a 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -102,6 +102,7 @@ static int ext3_readdir(struct file * filp,
int err;
struct inode *inode = filp->f_path.dentry->d_inode;
int ret = 0;
+ int dir_has_error = 0;
sb = inode->i_sb;
@@ -148,9 +149,12 @@ static int ext3_readdir(struct file * filp,
* of recovering data when there's a bad sector
*/
if (!bh) {
- ext3_error (sb, "ext3_readdir",
- "directory #%lu contains a hole at offset %lu",
- inode->i_ino, (unsigned long)filp->f_pos);
+ if (!dir_has_error) {
+ ext3_error(sb, __func__, "directory #%lu "
+ "contains a hole at offset %lld",
+ inode->i_ino, filp->f_pos);
+ dir_has_error = 1;
+ }
/* corrupt size? Maybe no more blocks to read */
if (filp->f_pos > inode->i_blocks << 9)
break;
@@ -410,7 +414,7 @@ static int call_filldir(struct file * filp, void * dirent,
get_dtype(sb, fname->file_type));
if (error) {
filp->f_pos = curr_pos;
- info->extra_fname = fname->next;
+ info->extra_fname = fname;
return error;
}
fname = fname->next;
@@ -449,11 +453,21 @@ static int ext3_dx_readdir(struct file * filp,
* If there are any leftover names on the hash collision
* chain, return them first.
*/
- if (info->extra_fname &&
- call_filldir(filp, dirent, filldir, info->extra_fname))
- goto finished;
+ if (info->extra_fname) {
+ if (call_filldir(filp, dirent, filldir, info->extra_fname))
+ goto finished;
- if (!info->curr_node)
+ info->extra_fname = NULL;
+ info->curr_node = rb_next(info->curr_node);
+ if (!info->curr_node) {
+ if (info->next_hash == ~0) {
+ filp->f_pos = EXT3_HTREE_EOF;
+ goto finished;
+ }
+ info->curr_hash = info->next_hash;
+ info->curr_minor_hash = 0;
+ }
+ } else if (!info->curr_node)
info->curr_node = rb_first(&info->root);
while (1) {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ebfec4d0148..f8424ad8997 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1186,6 +1186,13 @@ write_begin_failed:
ext3_journal_stop(handle);
unlock_page(page);
page_cache_release(page);
+ /*
+ * block_write_begin may have instantiated a few blocks
+ * outside i_size. Trim these off again. Don't need
+ * i_size_read because we hold i_mutex.
+ */
+ if (pos + len > inode->i_size)
+ vmtruncate(inode, inode->i_size);
}
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 77278e947e9..78fdf383637 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -790,7 +790,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
if (reserved_gdb || gdb_off == 0) {
if (!EXT3_HAS_COMPAT_FEATURE(sb,
- EXT3_FEATURE_COMPAT_RESIZE_INODE)){
+ EXT3_FEATURE_COMPAT_RESIZE_INODE)
+ || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
ext3_warning(sb, __func__,
"No reserved GDT blocks, can't resize");
return -EPERM;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 399a96a6c55..3a260af5544 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -625,6 +625,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
seq_puts(seq, ",data=writeback");
+ if (test_opt(sb, DATA_ERR_ABORT))
+ seq_puts(seq, ",data_err=abort");
+
ext3_show_quota_options(seq, sb);
return 0;
@@ -754,6 +757,7 @@ enum {
Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@@ -796,6 +800,8 @@ static const match_table_t tokens = {
{Opt_data_journal, "data=journal"},
{Opt_data_ordered, "data=ordered"},
{Opt_data_writeback, "data=writeback"},
+ {Opt_data_err_abort, "data_err=abort"},
+ {Opt_data_err_ignore, "data_err=ignore"},
{Opt_offusrjquota, "usrjquota="},
{Opt_usrjquota, "usrjquota=%s"},
{Opt_offgrpjquota, "grpjquota="},
@@ -1011,6 +1017,12 @@ static int parse_options (char *options, struct super_block *sb,
sbi->s_mount_opt |= data_opt;
}
break;
+ case Opt_data_err_abort:
+ set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ break;
+ case Opt_data_err_ignore:
+ clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ break;
#ifdef CONFIG_QUOTA
case Opt_usrjquota:
qtype = USRQUOTA;
@@ -1986,6 +1998,10 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_flags |= JFS_BARRIER;
else
journal->j_flags &= ~JFS_BARRIER;
+ if (test_opt(sb, DATA_ERR_ABORT))
+ journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
+ else
+ journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
spin_unlock(&journal->j_state_lock);
}
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index fec8f61227f..0022eec63cd 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,6 +199,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
goto done;
}
+ if (inode->i_ino == HFSPLUS_EXT_CNID)
+ return -EIO;
+
mutex_lock(&HFSPLUS_I(inode).extents_lock);
res = hfsplus_ext_read_extent(inode, ablock);
if (!res) {
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b085d64a2b6..963be644297 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -254,6 +254,8 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
{
if (HFSPLUS_IS_RSRC(inode))
inode = HFSPLUS_I(inode).rsrc_inode;
+ if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+ return -EOVERFLOW;
atomic_inc(&HFSPLUS_I(inode).opencnt);
return 0;
}
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index ae08c057e75..25719d902c5 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal)
printk(KERN_WARNING
"JBD: Detected IO errors while flushing file data "
"on %s\n", bdevname(journal->j_fs_dev, b));
+ if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
+ journal_abort(journal, err);
err = 0;
}
@@ -518,9 +520,10 @@ void journal_commit_transaction(journal_t *journal)
jh = commit_transaction->t_buffers;
/* If we're in abort mode, we just un-journal the buffer and
- release it for background writing. */
+ release it. */
if (is_journal_aborted(journal)) {
+ clear_buffer_jbddirty(jh2bh(jh));
JBUFFER_TRACE(jh, "journal is aborting: refile");
journal_refile_buffer(journal, jh);
/* If that was the last one, we need to clean up
@@ -762,6 +765,9 @@ wait_for_iobuf:
/* AKPM: bforget here */
}
+ if (err)
+ journal_abort(journal, err);
+
jbd_debug(3, "JBD: commit phase 6\n");
if (journal_write_commit_record(journal, commit_transaction))
@@ -852,6 +858,8 @@ restart_loop:
if (buffer_jbddirty(bh)) {
JBUFFER_TRACE(jh, "add to new checkpointing trans");
__journal_insert_checkpoint(jh, commit_transaction);
+ if (is_journal_aborted(journal))
+ clear_buffer_jbddirty(bh);
JBUFFER_TRACE(jh, "refile for checkpoint writeback");
__journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 0540ca27a44..d15cd6e7251 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -954,9 +954,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
journal_t *journal = handle->h_transaction->t_journal;
int need_brelse = 0;
struct journal_head *jh;
+ int ret = 0;
if (is_handle_aborted(handle))
- return 0;
+ return ret;
jh = journal_add_journal_head(bh);
JBUFFER_TRACE(jh, "entry");
@@ -1067,7 +1068,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
time if it is redirtied */
}
- /* journal_clean_data_list() may have got there first */
+ /*
+ * We cannot remove the buffer with io error from the
+ * committing transaction, because otherwise it would
+ * miss the error and the commit would not abort.
+ */
+ if (unlikely(!buffer_uptodate(bh))) {
+ ret = -EIO;
+ goto no_journal;
+ }
+
if (jh->b_transaction != NULL) {
JBUFFER_TRACE(jh, "unfile from commit");
__journal_temp_unlink_buffer(jh);
@@ -1108,7 +1118,7 @@ no_journal:
}
JBUFFER_TRACE(jh, "exit");
journal_put_journal_head(jh);
- return 0;
+ return ret;
}
/**
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
new file mode 100644
index 00000000000..6ae169cd8fa
--- /dev/null
+++ b/fs/jffs2/Kconfig
@@ -0,0 +1,188 @@
+config JFFS2_FS
+ tristate "Journalling Flash File System v2 (JFFS2) support"
+ select CRC32
+ depends on MTD
+ help
+ JFFS2 is the second generation of the Journalling Flash File System
+ for use on diskless embedded devices. It provides improved wear
+ levelling, compression and support for hard links. You cannot use
+ this on normal block devices, only on 'MTD' devices.
+
+ Further information on the design and implementation of JFFS2 is
+ available at <http://sources.redhat.com/jffs2/>.
+
+config JFFS2_FS_DEBUG
+ int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
+ depends on JFFS2_FS
+ default "0"
+ help
+ This controls the amount of debugging messages produced by the JFFS2
+ code. Set it to zero for use in production systems. For evaluation,
+ testing and debugging, it's advisable to set it to one. This will
+ enable a few assertions and will print debugging messages at the
+ KERN_DEBUG loglevel, where they won't normally be visible. Level 2
+ is unlikely to be useful - it enables extra debugging in certain
+ areas which at one point needed debugging, but when the bugs were
+ located and fixed, the detailed messages were relegated to level 2.
+
+ If reporting bugs, please try to have available a full dump of the
+ messages at debug level 1 while the misbehaviour was occurring.
+
+config JFFS2_FS_WRITEBUFFER
+ bool "JFFS2 write-buffering support"
+ depends on JFFS2_FS
+ default y
+ help
+ This enables the write-buffering support in JFFS2.
+
+ This functionality is required to support JFFS2 on the following
+ types of flash devices:
+ - NAND flash
+ - NOR flash with transparent ECC
+ - DataFlash
+
+config JFFS2_FS_WBUF_VERIFY
+ bool "Verify JFFS2 write-buffer reads"
+ depends on JFFS2_FS_WRITEBUFFER
+ default n
+ help
+ This causes JFFS2 to read back every page written through the
+ write-buffer, and check for errors.
+
+config JFFS2_SUMMARY
+ bool "JFFS2 summary support (EXPERIMENTAL)"
+ depends on JFFS2_FS && EXPERIMENTAL
+ default n
+ help
+ This feature makes it possible to use summary information
+ for faster filesystem mount.
+
+ The summary information can be inserted into a filesystem image
+ by the utility 'sumtool'.
+
+ If unsure, say 'N'.
+
+config JFFS2_FS_XATTR
+ bool "JFFS2 XATTR support (EXPERIMENTAL)"
+ depends on JFFS2_FS && EXPERIMENTAL
+ default n
+ help
+ Extended attributes are name:value pairs associated with inodes by
+ the kernel or by users (see the attr(5) manual page, or visit
+ <http://acl.bestbits.at/> for details).
+
+ If unsure, say N.
+
+config JFFS2_FS_POSIX_ACL
+ bool "JFFS2 POSIX Access Control Lists"
+ depends on JFFS2_FS_XATTR
+ default y
+ select FS_POSIX_ACL
+ help
+ Posix Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the Posix ACLs for
+ Linux website <http://acl.bestbits.at/>.
+
+ If you don't know what Access Control Lists are, say N
+
+config JFFS2_FS_SECURITY
+ bool "JFFS2 Security Labels"
+ depends on JFFS2_FS_XATTR
+ default y
+ help
+ Security labels support alternative access control models
+ implemented by security modules like SELinux. This option
+ enables an extended attribute handler for file security
+ labels in the jffs2 filesystem.
+
+ If you are not using a security module that requires using
+ extended attributes for file security labels, say N.
+
+config JFFS2_COMPRESSION_OPTIONS
+ bool "Advanced compression options for JFFS2"
+ depends on JFFS2_FS
+ default n
+ help
+ Enabling this option allows you to explicitly choose which
+ compression modules, if any, are enabled in JFFS2. Removing
+ compressors can mean you cannot read existing file systems,
+ and enabling experimental compressors can mean that you
+ write a file system which cannot be read by a standard kernel.
+
+ If unsure, you should _definitely_ say 'N'.
+
+config JFFS2_ZLIB
+ bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
+ select ZLIB_INFLATE
+ select ZLIB_DEFLATE
+ depends on JFFS2_FS
+ default y
+ help
+ Zlib is designed to be a free, general-purpose, legally unencumbered,
+ lossless data-compression library for use on virtually any computer
+ hardware and operating system. See <http://www.gzip.org/zlib/> for
+ further information.
+
+ Say 'Y' if unsure.
+
+config JFFS2_LZO
+ bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
+ select LZO_COMPRESS
+ select LZO_DECOMPRESS
+ depends on JFFS2_FS
+ default n
+ help
+ minilzo-based compression. Generally works better than Zlib.
+
+ This feature was added in July, 2007. Say 'N' if you need
+ compatibility with older bootloaders or kernels.
+
+config JFFS2_RTIME
+ bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
+ depends on JFFS2_FS
+ default y
+ help
+ Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
+
+config JFFS2_RUBIN
+ bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
+ depends on JFFS2_FS
+ default n
+ help
+ RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
+
+choice
+ prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
+ default JFFS2_CMODE_PRIORITY
+ depends on JFFS2_FS
+ help
+ You can set here the default compression mode of JFFS2 from
+ the available compression modes. Don't touch if unsure.
+
+config JFFS2_CMODE_NONE
+ bool "no compression"
+ help
+ Uses no compression.
+
+config JFFS2_CMODE_PRIORITY
+ bool "priority"
+ help
+ Tries the compressors in a predefined order and chooses the first
+ successful one.
+
+config JFFS2_CMODE_SIZE
+ bool "size (EXPERIMENTAL)"
+ help
+ Tries all compressors and chooses the one which has the smallest
+ result.
+
+config JFFS2_CMODE_FAVOURLZO
+ bool "Favour LZO"
+ help
+ Tries all compressors and chooses the one which has the smallest
+ result but gives some preference to LZO (which has faster
+ decompression) at the expense of size.
+
+endchoice
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 86739ee53b3..f25e70c1b51 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -53,8 +53,8 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this,
}
/* jffs2_compress:
- * @data: Pointer to uncompressed data
- * @cdata: Pointer to returned pointer to buffer for compressed data
+ * @data_in: Pointer to uncompressed data
+ * @cpage_out: Pointer to returned pointer to buffer for compressed data
* @datalen: On entry, holds the amount of data available for compression.
* On exit, expected to hold the amount of data actually compressed.
* @cdatalen: On entry, holds the amount of space available for compressed
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index cd219ef5525..b1aaae823a5 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -311,7 +311,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
/* FIXME: If you care. We'd need to use frags for the target
if it grows much more than this */
if (targetlen > 254)
- return -EINVAL;
+ return -ENAMETOOLONG;
ri = jffs2_alloc_raw_inode();
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index dddb2a6c9e2..259461b910a 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -68,7 +68,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
instr->len = c->sector_size;
instr->callback = jffs2_erase_callback;
instr->priv = (unsigned long)(&instr[1]);
- instr->fail_addr = 0xffffffff;
+ instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
((struct erase_priv_struct *)instr->priv)->jeb = jeb;
((struct erase_priv_struct *)instr->priv)->c = c;
@@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
{
/* For NAND, if the failure did not occur at the device level for a
specific physical page, don't bother updating the bad block table. */
- if (jffs2_cleanmarker_oob(c) && (bad_offset != 0xffffffff)) {
+ if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) {
/* We had a device-level failure to erase. Let's see if we've
failed too many times. */
if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 086c4383022..249305d65d5 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -207,6 +207,8 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = 0;
buf->f_ffree = 0;
buf->f_namelen = JFFS2_MAX_NAME_LEN;
+ buf->f_fsid.val[0] = JFFS2_SUPER_MAGIC;
+ buf->f_fsid.val[1] = c->mtd->index;
spin_lock(&c->erase_completion_lock);
avail = c->dirty_size + c->free_size;
@@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
memset(ri, 0, sizeof(*ri));
/* Set OS-specific defaults for new inodes */
- ri->uid = cpu_to_je16(current->fsuid);
+ ri->uid = cpu_to_je16(current_fsuid());
if (dir_i->i_mode & S_ISGID) {
ri->gid = cpu_to_je16(dir_i->i_gid);
if (S_ISDIR(mode))
mode |= S_ISGID;
} else {
- ri->gid = cpu_to_je16(current->fsgid);
+ ri->gid = cpu_to_je16(current_fsgid());
}
/* POSIX ACLs have to be processed now, at least partly.
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index a9bf9603c1b..0875b60b4bf 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -261,6 +261,10 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
jffs2_sum_reset_collected(c->summary); /* reset collected summary */
+ /* adjust write buffer offset, else we get a non contiguous write bug */
+ if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len)
+ c->wbuf_ofs = 0xffffffff;
+
D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset));
return 0;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 0e78b00035e..d9a721e6db7 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -679,10 +679,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
memset(c->wbuf,0xff,c->wbuf_pagesize);
/* adjust write buffer offset, else we get a non contiguous write bug */
- if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize))
- c->wbuf_ofs += c->wbuf_pagesize;
- else
- c->wbuf_ofs = 0xffffffff;
+ c->wbuf_ofs += c->wbuf_pagesize;
c->wbuf_len = 0;
return 0;
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2ab70d46ecb..efdba2e802d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1517,7 +1517,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
GFP_KERNEL)) {
pagevec_add(&lru_pvec, page);
- pagevec_lru_add(&lru_pvec);
+ pagevec_lru_add_file(&lru_pvec);
SetPageUptodate(page);
unlock_page(page);
} else
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index d020866d423..3140a4429af 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
pages[nr] = *cached_page;
page_cache_get(*cached_page);
if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
- __pagevec_lru_add(lru_pvec);
+ __pagevec_lru_add_file(lru_pvec);
*cached_page = NULL;
}
index++;
@@ -2084,7 +2084,7 @@ err_out:
OSYNC_METADATA|OSYNC_DATA);
}
}
- pagevec_lru_add(&lru_pvec);
+ pagevec_lru_add_file(&lru_pvec);
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
written ? "written" : "status", (unsigned long)written,
(long)status);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 59ea42e1ef0..61b25f4eabe 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -136,6 +136,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
unsigned long allowed;
struct vmalloc_info vmi;
long cached;
+ unsigned long pages[NR_LRU_LISTS];
+ int lru;
/*
* display in kilobytes.
@@ -154,51 +156,70 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
get_vmalloc_info(&vmi);
+ for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+ pages[lru] = global_page_state(NR_LRU_BASE + lru);
+
/*
* Tagged format, for easy grepping and expansion.
*/
len = sprintf(page,
- "MemTotal: %8lu kB\n"
- "MemFree: %8lu kB\n"
- "Buffers: %8lu kB\n"
- "Cached: %8lu kB\n"
- "SwapCached: %8lu kB\n"
- "Active: %8lu kB\n"
- "Inactive: %8lu kB\n"
+ "MemTotal: %8lu kB\n"
+ "MemFree: %8lu kB\n"
+ "Buffers: %8lu kB\n"
+ "Cached: %8lu kB\n"
+ "SwapCached: %8lu kB\n"
+ "Active: %8lu kB\n"
+ "Inactive: %8lu kB\n"
+ "Active(anon): %8lu kB\n"
+ "Inactive(anon): %8lu kB\n"
+ "Active(file): %8lu kB\n"
+ "Inactive(file): %8lu kB\n"
+#ifdef CONFIG_UNEVICTABLE_LRU
+ "Unevictable: %8lu kB\n"
+ "Mlocked: %8lu kB\n"
+#endif
#ifdef CONFIG_HIGHMEM
- "HighTotal: %8lu kB\n"
- "HighFree: %8lu kB\n"
- "LowTotal: %8lu kB\n"
- "LowFree: %8lu kB\n"
+ "HighTotal: %8lu kB\n"
+ "HighFree: %8lu kB\n"
+ "LowTotal: %8lu kB\n"
+ "LowFree: %8lu kB\n"
#endif
- "SwapTotal: %8lu kB\n"
- "SwapFree: %8lu kB\n"
- "Dirty: %8lu kB\n"
- "Writeback: %8lu kB\n"
- "AnonPages: %8lu kB\n"
- "Mapped: %8lu kB\n"
- "Slab: %8lu kB\n"
- "SReclaimable: %8lu kB\n"
- "SUnreclaim: %8lu kB\n"
- "PageTables: %8lu kB\n"
+ "SwapTotal: %8lu kB\n"
+ "SwapFree: %8lu kB\n"
+ "Dirty: %8lu kB\n"
+ "Writeback: %8lu kB\n"
+ "AnonPages: %8lu kB\n"
+ "Mapped: %8lu kB\n"
+ "Slab: %8lu kB\n"
+ "SReclaimable: %8lu kB\n"
+ "SUnreclaim: %8lu kB\n"
+ "PageTables: %8lu kB\n"
#ifdef CONFIG_QUICKLIST
- "Quicklists: %8lu kB\n"
+ "Quicklists: %8lu kB\n"
#endif
- "NFS_Unstable: %8lu kB\n"
- "Bounce: %8lu kB\n"
- "WritebackTmp: %8lu kB\n"
- "CommitLimit: %8lu kB\n"
- "Committed_AS: %8lu kB\n"
- "VmallocTotal: %8lu kB\n"
- "VmallocUsed: %8lu kB\n"
- "VmallocChunk: %8lu kB\n",
+ "NFS_Unstable: %8lu kB\n"
+ "Bounce: %8lu kB\n"
+ "WritebackTmp: %8lu kB\n"
+ "CommitLimit: %8lu kB\n"
+ "Committed_AS: %8lu kB\n"
+ "VmallocTotal: %8lu kB\n"
+ "VmallocUsed: %8lu kB\n"
+ "VmallocChunk: %8lu kB\n",
K(i.totalram),
K(i.freeram),
K(i.bufferram),
K(cached),
K(total_swapcache_pages),
- K(global_page_state(NR_ACTIVE)),
- K(global_page_state(NR_INACTIVE)),
+ K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
+ K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
+ K(pages[LRU_ACTIVE_ANON]),
+ K(pages[LRU_INACTIVE_ANON]),
+ K(pages[LRU_ACTIVE_FILE]),
+ K(pages[LRU_INACTIVE_FILE]),
+#ifdef CONFIG_UNEVICTABLE_LRU
+ K(pages[LRU_UNEVICTABLE]),
+ K(global_page_state(NR_MLOCK)),
+#endif
#ifdef CONFIG_HIGHMEM
K(i.totalhigh),
K(i.freehigh),
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 841368b87a2..cd9ca67f841 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -32,9 +32,6 @@ static size_t elfcorebuf_sz;
/* Total size of vmcore file. */
static u64 vmcore_size;
-/* Stores the physical address of elf header of crash image. */
-unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-
struct proc_dir_entry *proc_vmcore = NULL;
/* Reads a page from the oldmem device from given offset. */
@@ -647,7 +644,7 @@ static int __init vmcore_init(void)
int rc = 0;
/* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
- if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX))
+ if (!(is_vmcore_usable()))
return rc;
rc = parse_crash_elf_headers();
if (rc) {
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5145cb9125a..76acdbc3461 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
goto add_error;
if (!pagevec_add(&lru_pvec, page))
- __pagevec_lru_add(&lru_pvec);
+ __pagevec_lru_add_file(&lru_pvec);
unlock_page(page);
}
- pagevec_lru_add(&lru_pvec);
+ pagevec_lru_add_file(&lru_pvec);
return 0;
fsize_exceeded:
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b13123424e4..f031d1c925f 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
inode->i_mapping->a_ops = &ramfs_aops;
inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+ mapping_set_unevictable(inode->i_mapping);
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch (mode & S_IFMT) {
default:
diff --git a/fs/seq_file.c b/fs/seq_file.c
index bd20f7f5a93..eba2eabcd2b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -452,17 +452,34 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
{
- size_t len = bitmap_scnprintf_len(nr_bits);
+ if (m->count < m->size) {
+ int len = bitmap_scnprintf(m->buf + m->count,
+ m->size - m->count, bits, nr_bits);
+ if (m->count + len < m->size) {
+ m->count += len;
+ return 0;
+ }
+ }
+ m->count = m->size;
+ return -1;
+}
+EXPORT_SYMBOL(seq_bitmap);
- if (m->count + len < m->size) {
- bitmap_scnprintf(m->buf + m->count, m->size - m->count,
- bits, nr_bits);
- m->count += len;
- return 0;
+int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
+ unsigned int nr_bits)
+{
+ if (m->count < m->size) {
+ int len = bitmap_scnlistprintf(m->buf + m->count,
+ m->size - m->count, bits, nr_bits);
+ if (m->count + len < m->size) {
+ m->count += len;
+ return 0;
+ }
}
m->count = m->size;
return -1;
}
+EXPORT_SYMBOL(seq_bitmap_list);
static void *single_start(struct seq_file *p, loff_t *pos)
{
diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h
index 7efe1000f99..cee97f14af3 100644
--- a/include/asm-cris/thread_info.h
+++ b/include/asm-cris/thread_info.h
@@ -88,6 +88,7 @@ struct thread_info {
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_MEMDIE 17
+#define TIF_FREEZE 18 /* is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -95,6 +96,7 @@ struct thread_info {
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE (1<<TIF_FREEZE)
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h
index 71ef3f0b968..89061c1a67d 100644
--- a/include/asm-generic/rtc.h
+++ b/include/asm-generic/rtc.h
@@ -84,12 +84,12 @@ static inline unsigned int get_rtc_time(struct rtc_time *time)
if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
{
- BCD_TO_BIN(time->tm_sec);
- BCD_TO_BIN(time->tm_min);
- BCD_TO_BIN(time->tm_hour);
- BCD_TO_BIN(time->tm_mday);
- BCD_TO_BIN(time->tm_mon);
- BCD_TO_BIN(time->tm_year);
+ time->tm_sec = bcd2bin(time->tm_sec);
+ time->tm_min = bcd2bin(time->tm_min);
+ time->tm_hour = bcd2bin(time->tm_hour);
+ time->tm_mday = bcd2bin(time->tm_mday);
+ time->tm_mon = bcd2bin(time->tm_mon);
+ time->tm_year = bcd2bin(time->tm_year);
}
#ifdef CONFIG_MACH_DECSTATION
@@ -159,12 +159,12 @@ static inline int set_rtc_time(struct rtc_time *time)
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
|| RTC_ALWAYS_BCD) {
- BIN_TO_BCD(sec);
- BIN_TO_BCD(min);
- BIN_TO_BCD(hrs);
- BIN_TO_BCD(day);
- BIN_TO_BCD(mon);
- BIN_TO_BCD(yrs);
+ sec = bin2bcd(sec);
+ min = bin2bcd(min);
+ hrs = bin2bcd(hrs);
+ day = bin2bcd(day);
+ mon = bin2bcd(mon);
+ yrs = bin2bcd(yrs);
}
save_control = CMOS_READ(RTC_CONTROL);
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h
index abc002798a2..af0fda46e94 100644
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -52,5 +52,6 @@ struct thread_info {
#define TIF_DELAYED_TRACE 14 /* single step a syscall */
#define TIF_SYSCALL_TRACE 15 /* syscall trace active */
#define TIF_MEMDIE 16
+#define TIF_FREEZE 17 /* thread is freezing for suspend */
#endif /* _ASM_M68K_THREAD_INFO_H */
diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h
index 9f812741c35..0407959da48 100644
--- a/include/asm-parisc/thread_info.h
+++ b/include/asm-parisc/thread_info.h
@@ -58,6 +58,7 @@ struct thread_info {
#define TIF_32BIT 4 /* 32 bit binary */
#define TIF_MEMDIE 5
#define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */
+#define TIF_FREEZE 7 /* is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
@@ -65,6 +66,7 @@ struct thread_info {
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE (1 << TIF_FREEZE)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | \
_TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
index e07e72846c7..62274ab9471 100644
--- a/include/asm-um/thread_info.h
+++ b/include/asm-um/thread_info.h
@@ -69,6 +69,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_MEMDIE 5
#define TIF_SYSCALL_AUDIT 6
#define TIF_RESTORE_SIGMASK 7
+#define TIF_FREEZE 16 /* is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
@@ -77,5 +78,6 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE (1 << TIF_FREEZE)
#endif
diff --git a/include/asm-xtensa/thread_info.h b/include/asm-xtensa/thread_info.h
index 7e4131dd546..0f4fe1faf9b 100644
--- a/include/asm-xtensa/thread_info.h
+++ b/include/asm-xtensa/thread_info.h
@@ -134,6 +134,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_MEMDIE 5
#define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_FREEZE 17 /* is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
@@ -142,6 +143,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_IRET (1<<TIF_IRET)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE (1<<TIF_FREEZE)
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index bf9aca548f1..e531783e5d7 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -183,6 +183,7 @@ unifdef-y += auto_fs.h
unifdef-y += auxvec.h
unifdef-y += binfmts.h
unifdef-y += blktrace_api.h
+unifdef-y += byteorder.h
unifdef-y += capability.h
unifdef-y += capi.h
unifdef-y += cciss_ioctl.h
@@ -340,6 +341,7 @@ unifdef-y += soundcard.h
unifdef-y += stat.h
unifdef-y += stddef.h
unifdef-y += string.h
+unifdef-y += swab.h
unifdef-y += synclink.h
unifdef-y += sysctl.h
unifdef-y += tcp.h
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0a24d5550eb..bee52abb8a4 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -175,6 +175,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
* BDI_CAP_READ_MAP: Can be mapped for reading
* BDI_CAP_WRITE_MAP: Can be mapped for writing
* BDI_CAP_EXEC_MAP: Can be mapped for execution
+ *
+ * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed.
*/
#define BDI_CAP_NO_ACCT_DIRTY 0x00000001
#define BDI_CAP_NO_WRITEBACK 0x00000002
@@ -184,6 +186,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
#define BDI_CAP_WRITE_MAP 0x00000020
#define BDI_CAP_EXEC_MAP 0x00000040
#define BDI_CAP_NO_ACCT_WB 0x00000080
+#define BDI_CAP_SWAP_BACKED 0x00000100
#define BDI_CAP_VMFLAGS \
(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -248,6 +251,11 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
BDI_CAP_NO_WRITEBACK));
}
+static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
+{
+ return bdi->capabilities & BDI_CAP_SWAP_BACKED;
+}
+
static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
{
return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -258,4 +266,9 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping)
return bdi_cap_account_dirty(mapping->backing_dev_info);
}
+static inline bool mapping_cap_swap_backed(struct address_space *mapping)
+{
+ return bdi_cap_swap_backed(mapping->backing_dev_info);
+}
+
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/bcd.h b/include/linux/bcd.h
index 7ac518e3c15..22ea563ba3e 100644
--- a/include/linux/bcd.h
+++ b/include/linux/bcd.h
@@ -1,12 +1,3 @@
-/* Permission is hereby granted to copy, modify and redistribute this code
- * in terms of the GNU Library General Public License, Version 2 or later,
- * at your option.
- */
-
-/* macros to translate to/from binary and binary-coded decimal (frequently
- * found in RTC chips).
- */
-
#ifndef _BCD_H
#define _BCD_H
@@ -15,11 +6,4 @@
unsigned bcd2bin(unsigned char val) __attribute_const__;
unsigned char bin2bcd(unsigned val) __attribute_const__;
-#define BCD2BIN(val) bcd2bin(val)
-#define BIN2BCD(val) bin2bcd(val)
-
-/* backwards compat */
-#define BCD_TO_BIN(val) ((val)=BCD2BIN(val))
-#define BIN_TO_BCD(val) ((val)=BIN2BCD(val))
-
#endif /* _BCD_H */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 89781fd4885..1abfe664c44 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -110,7 +110,6 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits);
extern int bitmap_scnprintf(char *buf, unsigned int len,
const unsigned long *src, int nbits);
-extern int bitmap_scnprintf_len(unsigned int nr_bits);
extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
unsigned long *dst, int nbits);
extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index eadaab44015..3ce64b90118 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -322,7 +322,7 @@ static inline void wait_on_buffer(struct buffer_head *bh)
static inline int trylock_buffer(struct buffer_head *bh)
{
- return likely(!test_and_set_bit(BH_Lock, &bh->b_state));
+ return likely(!test_and_set_bit_lock(BH_Lock, &bh->b_state));
}
static inline void lock_buffer(struct buffer_head *bh)
diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild
index 1133d5f9d81..fbaa7f9cee3 100644
--- a/include/linux/byteorder/Kbuild
+++ b/include/linux/byteorder/Kbuild
@@ -1,3 +1,4 @@
unifdef-y += big_endian.h
unifdef-y += little_endian.h
unifdef-y += swab.h
+unifdef-y += swabb.h
diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h
index 44f95b92393..1cba3f3efe5 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -10,6 +10,7 @@
#include <linux/types.h>
#include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
#define __constant_htonl(x) ((__force __be32)(__u32)(x))
#define __constant_ntohl(x) ((__force __u32)(__be32)(x))
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h
index 4cc170a3176..cedc1b5a289 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -10,6 +10,7 @@
#include <linux/types.h>
#include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
#define __constant_htonl(x) ((__force __be32)___constant_swab32((x)))
#define __constant_ntohl(x) ___constant_swab32((__force __be32)(x))
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 30934e4bfaa..8b00f6643e9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -9,12 +9,12 @@
*/
#include <linux/sched.h>
-#include <linux/kref.h>
#include <linux/cpumask.h>
#include <linux/nodemask.h>
#include <linux/rcupdate.h>
#include <linux/cgroupstats.h>
#include <linux/prio_heap.h>
+#include <linux/rwsem.h>
#ifdef CONFIG_CGROUPS
@@ -137,6 +137,15 @@ struct cgroup {
* release_list_lock
*/
struct list_head release_list;
+
+ /* pids_mutex protects the fields below */
+ struct rw_semaphore pids_mutex;
+ /* Array of process ids in the cgroup */
+ pid_t *tasks_pids;
+ /* How many files are using the current tasks_pids array */
+ int pids_use_count;
+ /* Length of the current tasks_pids array */
+ int pids_length;
};
/* A css_set is a structure holding pointers to a set of
@@ -149,7 +158,7 @@ struct cgroup {
struct css_set {
/* Reference count */
- struct kref ref;
+ atomic_t refcount;
/*
* List running through all cgroup groups in the same hash
@@ -394,6 +403,9 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
int cgroup_scan_tasks(struct cgroup_scanner *scan);
int cgroup_attach_task(struct cgroup *, struct task_struct *);
+void cgroup_mm_owner_callbacks(struct task_struct *old,
+ struct task_struct *new);
+
#else /* !CONFIG_CGROUPS */
static inline int cgroup_init_early(void) { return 0; }
@@ -412,15 +424,9 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
return -EINVAL;
}
+static inline void cgroup_mm_owner_callbacks(struct task_struct *old,
+ struct task_struct *new) {}
+
#endif /* !CONFIG_CGROUPS */
-#ifdef CONFIG_MM_OWNER
-extern void
-cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new);
-#else /* !CONFIG_MM_OWNER */
-static inline void
-cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
-{
-}
-#endif /* CONFIG_MM_OWNER */
#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index e2877454ec8..9c22396e8b5 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -48,3 +48,9 @@ SUBSYS(devices)
#endif
/* */
+
+#ifdef CONFIG_CGROUP_FREEZER
+SUBSYS(freezer)
+#endif
+
+/* */
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 025e4f57510..0acf3b737e2 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -8,12 +8,9 @@
#include <linux/proc_fs.h>
#define ELFCORE_ADDR_MAX (-1ULL)
+#define ELFCORE_ADDR_ERR (-2ULL)
-#ifdef CONFIG_PROC_VMCORE
extern unsigned long long elfcorehdr_addr;
-#else
-static const unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-#endif
extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
unsigned long, int);
@@ -28,10 +25,43 @@ extern struct proc_dir_entry *proc_vmcore;
#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
+/*
+ * is_kdump_kernel() checks whether this kernel is booting after a panic of
+ * previous kernel or not. This is determined by checking if previous kernel
+ * has passed the elf core header address on command line.
+ *
+ * This is not just a test if CONFIG_CRASH_DUMP is enabled or not. It will
+ * return 1 if CONFIG_CRASH_DUMP=y and if kernel is booting after a panic of
+ * previous kernel.
+ */
+
static inline int is_kdump_kernel(void)
{
return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
}
+
+/* is_vmcore_usable() checks if the kernel is booting after a panic and
+ * the vmcore region is usable.
+ *
+ * This makes use of the fact that due to alignment -2ULL is not
+ * a valid pointer, much in the vain of IS_ERR(), except
+ * dealing directly with an unsigned long long rather than a pointer.
+ */
+
+static inline int is_vmcore_usable(void)
+{
+ return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
+}
+
+/* vmcore_unusable() marks the vmcore as unusable,
+ * without disturbing the logic of is_kdump_kernel()
+ */
+
+static inline void vmcore_unusable(void)
+{
+ if (is_kdump_kernel())
+ elfcorehdr_addr = ELFCORE_ADDR_ERR;
+}
#else /* !CONFIG_CRASH_DUMP */
static inline int is_kdump_kernel(void) { return 0; }
#endif /* CONFIG_CRASH_DUMP */
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 159d9b476cd..d14f0291848 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -380,6 +380,8 @@ struct ext3_inode {
#define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write
+ * error in ordered mode */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 531ccd5f596..75a81eaf343 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -808,6 +808,7 @@ struct fb_tile_ops {
struct fb_info {
int node;
int flags;
+ struct mutex lock; /* Lock for open/release/ioctl funcs */
struct fb_var_screeninfo var; /* Current var */
struct fb_fix_screeninfo fix; /* Current fix */
struct fb_monspecs monspecs; /* Current Monitor specs */
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index deddeedf325..8f225339eee 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -6,7 +6,7 @@
#include <linux/sched.h>
#include <linux/wait.h>
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_FREEZER
/*
* Check if a process has been frozen
*/
@@ -39,28 +39,18 @@ static inline void clear_freeze_flag(struct task_struct *p)
clear_tsk_thread_flag(p, TIF_FREEZE);
}
+static inline bool should_send_signal(struct task_struct *p)
+{
+ return !(p->flags & PF_FREEZER_NOSIG);
+}
+
/*
* Wake up a frozen process
- *
- * task_lock() is taken to prevent the race with refrigerator() which may
- * occur if the freezing of tasks fails. Namely, without the lock, if the
- * freezing of tasks failed, thaw_tasks() might have run before a task in
- * refrigerator() could call frozen_process(), in which case the task would be
- * frozen and no one would thaw it.
*/
-static inline int thaw_process(struct task_struct *p)
-{
- task_lock(p);
- if (frozen(p)) {
- p->flags &= ~PF_FROZEN;
- task_unlock(p);
- wake_up_process(p);
- return 1;
- }
- clear_freeze_flag(p);
- task_unlock(p);
- return 0;
-}
+extern int __thaw_process(struct task_struct *p);
+
+/* Takes and releases task alloc lock using task_lock() */
+extern int thaw_process(struct task_struct *p);
extern void refrigerator(void);
extern int freeze_processes(void);
@@ -75,6 +65,15 @@ static inline int try_to_freeze(void)
return 0;
}
+extern bool freeze_task(struct task_struct *p, bool sig_only);
+extern void cancel_freezing(struct task_struct *p);
+
+#ifdef CONFIG_CGROUP_FREEZER
+extern int cgroup_frozen(struct task_struct *task);
+#else /* !CONFIG_CGROUP_FREEZER */
+static inline int cgroup_frozen(struct task_struct *task) { return 0; }
+#endif /* !CONFIG_CGROUP_FREEZER */
+
/*
* The PF_FREEZER_SKIP flag should be set by a vfork parent right before it
* calls wait_for_completion(&vfork) and reset right after it returns from this
@@ -166,7 +165,7 @@ static inline void set_freezable_with_signal(void)
} while (try_to_freeze()); \
__retval; \
})
-#else /* !CONFIG_PM_SLEEP */
+#else /* !CONFIG_FREEZER */
static inline int frozen(struct task_struct *p) { return 0; }
static inline int freezing(struct task_struct *p) { return 0; }
static inline void set_freeze_flag(struct task_struct *p) {}
@@ -191,6 +190,6 @@ static inline void set_freezable_with_signal(void) {}
#define wait_event_freezable_timeout(wq, condition, timeout) \
wait_event_interruptible_timeout(wq, condition, timeout)
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* !CONFIG_FREEZER */
#endif /* FREEZER_H_INCLUDED */
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 7ebbcb1c9ba..35d4f6342fa 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -816,6 +816,9 @@ struct journal_s
#define JFS_FLUSHED 0x008 /* The journal superblock has been flushed */
#define JFS_LOADED 0x010 /* The journal superblock has been loaded */
#define JFS_BARRIER 0x020 /* Use IDE barriers */
+#define JFS_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
+ * data write error in ordered
+ * mode */
/*
* Function declarations for the journaling transaction and buffer
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fdf3967e139..1fbe14d3952 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -27,16 +27,13 @@ struct mm_struct;
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0)
-
-extern struct page_cgroup *page_get_page_cgroup(struct page *page);
extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
+extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
extern void mem_cgroup_uncharge_page(struct page *page);
extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern void mem_cgroup_move_lists(struct page *page, bool active);
extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -44,7 +41,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
unsigned long *scanned, int order,
int mode, struct zone *z,
struct mem_cgroup *mem_cont,
- int active);
+ int active, int file);
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
@@ -69,21 +66,11 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
int priority);
-extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
- struct zone *zone, int priority);
-extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
- struct zone *zone, int priority);
-
-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
-static inline void page_reset_bad_cgroup(struct page *page)
-{
-}
+extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
+ int priority, enum lru_list lru);
-static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
-{
- return NULL;
-}
+#else /* CONFIG_CGROUP_MEM_RES_CTLR */
static inline int mem_cgroup_charge(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
@@ -159,14 +146,9 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
{
}
-static inline long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
- struct zone *zone, int priority)
-{
- return 0;
-}
-
-static inline long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
- struct zone *zone, int priority)
+static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
+ struct zone *zone, int priority,
+ enum lru_list lru)
{
return 0;
}
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 03aea612d28..3f34005068d 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,7 +7,6 @@
typedef struct page *new_page_t(struct page *, unsigned long private, int **);
#ifdef CONFIG_MIGRATION
-extern int isolate_lru_page(struct page *p, struct list_head *pagelist);
extern int putback_lru_pages(struct list_head *l);
extern int migrate_page(struct address_space *,
struct page *, struct page *);
@@ -21,8 +20,6 @@ extern int migrate_vmas(struct mm_struct *mm,
const nodemask_t *from, const nodemask_t *to,
unsigned long flags);
#else
-static inline int isolate_lru_page(struct page *p, struct list_head *list)
- { return -ENOSYS; }
static inline int putback_lru_pages(struct list_head *l) { return 0; }
static inline int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private) { return -ENOSYS; }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c61ba10768e..ffee2f74341 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -132,6 +132,11 @@ extern unsigned int kobjsize(const void *objp);
#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ)
/*
+ * special vmas that are non-mergable, non-mlock()able
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+
+/*
* mapping from the currently active vm_flags protection bits (the
* low four bits) to a page protection mask..
*/
@@ -700,10 +705,10 @@ static inline int page_mapped(struct page *page)
extern void show_free_areas(void);
#ifdef CONFIG_SHMEM
-int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
#else
static inline int shmem_lock(struct file *file, int lock,
- struct user_struct *user)
+ struct user_struct *user)
{
return 0;
}
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 895bc4e9303..c948350c378 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -1,40 +1,100 @@
-static inline void
-add_page_to_active_list(struct zone *zone, struct page *page)
-{
- list_add(&page->lru, &zone->active_list);
- __inc_zone_state(zone, NR_ACTIVE);
-}
+#ifndef LINUX_MM_INLINE_H
+#define LINUX_MM_INLINE_H
-static inline void
-add_page_to_inactive_list(struct zone *zone, struct page *page)
+/**
+ * page_is_file_cache - should the page be on a file LRU or anon LRU?
+ * @page: the page to test
+ *
+ * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
+ * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
+ * Used by functions that manipulate the LRU lists, to sort a page
+ * onto the right LRU list.
+ *
+ * We would like to get this info without a page flag, but the state
+ * needs to survive until the page is last deleted from the LRU, which
+ * could be as far down as __page_cache_release.
+ */
+static inline int page_is_file_cache(struct page *page)
{
- list_add(&page->lru, &zone->inactive_list);
- __inc_zone_state(zone, NR_INACTIVE);
+ if (PageSwapBacked(page))
+ return 0;
+
+ /* The page is page cache backed by a normal filesystem. */
+ return LRU_FILE;
}
static inline void
-del_page_from_active_list(struct zone *zone, struct page *page)
+add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
{
- list_del(&page->lru);
- __dec_zone_state(zone, NR_ACTIVE);
+ list_add(&page->lru, &zone->lru[l].list);
+ __inc_zone_state(zone, NR_LRU_BASE + l);
}
static inline void
-del_page_from_inactive_list(struct zone *zone, struct page *page)
+del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
{
list_del(&page->lru);
- __dec_zone_state(zone, NR_INACTIVE);
+ __dec_zone_state(zone, NR_LRU_BASE + l);
}
static inline void
del_page_from_lru(struct zone *zone, struct page *page)
{
+ enum lru_list l = LRU_BASE;
+
list_del(&page->lru);
- if (PageActive(page)) {
- __ClearPageActive(page);
- __dec_zone_state(zone, NR_ACTIVE);
+ if (PageUnevictable(page)) {
+ __ClearPageUnevictable(page);
+ l = LRU_UNEVICTABLE;
} else {
- __dec_zone_state(zone, NR_INACTIVE);
+ if (PageActive(page)) {
+ __ClearPageActive(page);
+ l += LRU_ACTIVE;
+ }
+ l += page_is_file_cache(page);
+ }
+ __dec_zone_state(zone, NR_LRU_BASE + l);
+}
+
+/**
+ * page_lru - which LRU list should a page be on?
+ * @page: the page to test
+ *
+ * Returns the LRU list a page should be on, as an index
+ * into the array of LRU lists.
+ */
+static inline enum lru_list page_lru(struct page *page)
+{
+ enum lru_list lru = LRU_BASE;
+
+ if (PageUnevictable(page))
+ lru = LRU_UNEVICTABLE;
+ else {
+ if (PageActive(page))
+ lru += LRU_ACTIVE;
+ lru += page_is_file_cache(page);
}
+
+ return lru;
}
+/**
+ * inactive_anon_is_low - check if anonymous pages need to be deactivated
+ * @zone: zone to check
+ *
+ * Returns true if the zone does not have enough inactive anon pages,
+ * meaning some active anon pages need to be deactivated.
+ */
+static inline int inactive_anon_is_low(struct zone *zone)
+{
+ unsigned long active, inactive;
+
+ active = zone_page_state(zone, NR_ACTIVE_ANON);
+ inactive = zone_page_state(zone, NR_INACTIVE_ANON);
+
+ if (inactive * zone->inactive_ratio < active)
+ return 1;
+
+ return 0;
+}
+#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9d49fa36bbe..fe825471d5a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -94,9 +94,6 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
- unsigned long page_cgroup;
-#endif
};
/*
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 428328a05fa..35a7b5e1946 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -81,21 +81,31 @@ struct zone_padding {
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
- NR_INACTIVE,
- NR_ACTIVE,
+ NR_LRU_BASE,
+ NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
+ NR_ACTIVE_ANON, /* " " " " " */
+ NR_INACTIVE_FILE, /* " " " " " */
+ NR_ACTIVE_FILE, /* " " " " " */
+#ifdef CONFIG_UNEVICTABLE_LRU
+ NR_UNEVICTABLE, /* " " " " " */
+ NR_MLOCK, /* mlock()ed pages found and moved off LRU */
+#else
+ NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */
+ NR_MLOCK = NR_ACTIVE_FILE,
+#endif
NR_ANON_PAGES, /* Mapped anonymous pages */
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
only modified from process context */
NR_FILE_PAGES,
NR_FILE_DIRTY,
NR_WRITEBACK,
- /* Second 128 byte cacheline */
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
NR_UNSTABLE_NFS, /* NFS unstable pages */
NR_BOUNCE,
NR_VMSCAN_WRITE,
+ /* Second 128 byte cacheline */
NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
#ifdef CONFIG_NUMA
NUMA_HIT, /* allocated in intended node */
@@ -107,6 +117,55 @@ enum zone_stat_item {
#endif
NR_VM_ZONE_STAT_ITEMS };
+/*
+ * We do arithmetic on the LRU lists in various places in the code,
+ * so it is important to keep the active lists LRU_ACTIVE higher in
+ * the array than the corresponding inactive lists, and to keep
+ * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
+ *
+ * This has to be kept in sync with the statistics in zone_stat_item
+ * above and the descriptions in vmstat_text in mm/vmstat.c
+ */
+#define LRU_BASE 0
+#define LRU_ACTIVE 1
+#define LRU_FILE 2
+
+enum lru_list {
+ LRU_INACTIVE_ANON = LRU_BASE,
+ LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
+ LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
+ LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
+#ifdef CONFIG_UNEVICTABLE_LRU
+ LRU_UNEVICTABLE,
+#else
+ LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */
+#endif
+ NR_LRU_LISTS
+};
+
+#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
+
+#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)
+
+static inline int is_file_lru(enum lru_list l)
+{
+ return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
+}
+
+static inline int is_active_lru(enum lru_list l)
+{
+ return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
+}
+
+static inline int is_unevictable_lru(enum lru_list l)
+{
+#ifdef CONFIG_UNEVICTABLE_LRU
+ return (l == LRU_UNEVICTABLE);
+#else
+ return 0;
+#endif
+}
+
struct per_cpu_pages {
int count; /* number of pages in the list */
int high; /* high watermark, emptying needed */
@@ -251,10 +310,22 @@ struct zone {
/* Fields commonly accessed by the page reclaim scanner */
spinlock_t lru_lock;
- struct list_head active_list;
- struct list_head inactive_list;
- unsigned long nr_scan_active;
- unsigned long nr_scan_inactive;
+ struct {
+ struct list_head list;
+ unsigned long nr_scan;
+ } lru[NR_LRU_LISTS];
+
+ /*
+ * The pageout code in vmscan.c keeps track of how many of the
+ * mem/swap backed and file backed pages are refeferenced.
+ * The higher the rotated/scanned ratio, the more valuable
+ * that cache is.
+ *
+ * The anon LRU stats live in [0], file LRU stats in [1]
+ */
+ unsigned long recent_rotated[2];
+ unsigned long recent_scanned[2];
+
unsigned long pages_scanned; /* since last reclaim */
unsigned long flags; /* zone flags, see below */
@@ -276,6 +347,12 @@ struct zone {
*/
int prev_priority;
+ /*
+ * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+ * this zone's LRU. Maintained by the pageout code.
+ */
+ unsigned int inactive_ratio;
+
ZONE_PADDING(_pad2_)
/* Rarely used or read-mostly fields */
@@ -524,8 +601,11 @@ typedef struct pglist_data {
struct zone node_zones[MAX_NR_ZONES];
struct zonelist node_zonelists[MAX_ZONELISTS];
int nr_zones;
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
struct page *node_mem_map;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+ struct page_cgroup *node_page_cgroup;
+#endif
#endif
struct bootmem_data *bdata;
#ifdef CONFIG_MEMORY_HOTPLUG
@@ -854,6 +934,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
#endif
struct page;
+struct page_cgroup;
struct mem_section {
/*
* This is, logically, a pointer to an array of struct
@@ -871,6 +952,14 @@ struct mem_section {
/* See declaration of similar field in struct zone */
unsigned long *pageblock_flags;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+ /*
+ * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
+ * section. (see memcontrol.h/page_cgroup.h about this.)
+ */
+ struct page_cgroup *page_cgroup;
+ unsigned long pad;
+#endif
};
#ifdef CONFIG_SPARSEMEM_EXTREME
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index d6fb115f5a0..ee5124ec319 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -12,6 +12,7 @@
#include <linux/mtd/flashchip.h>
#include <linux/mtd/map.h>
#include <linux/mtd/cfi_endian.h>
+#include <linux/mtd/xip.h>
#ifdef CONFIG_MTD_CFI_I1
#define cfi_interleave(cfi) 1
@@ -430,7 +431,6 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t
{
map_word val;
uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type);
-
val = cfi_build_cmd(cmd, map, cfi);
if (prev_val)
@@ -483,6 +483,13 @@ static inline void cfi_udelay(int us)
}
}
+int __xipram cfi_qry_present(struct map_info *map, __u32 base,
+ struct cfi_private *cfi);
+int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map,
+ struct cfi_private *cfi);
+void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
+ struct cfi_private *cfi);
+
struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size,
const char* name);
struct cfi_fixup {
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index 08dd131301c..d4f38c5fd44 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -73,6 +73,10 @@ struct flchip {
int buffer_write_time;
int erase_time;
+ int word_write_time_max;
+ int buffer_write_time_max;
+ int erase_time_max;
+
void *priv;
};
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 92263654855..eae26bb6430 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -25,8 +25,10 @@
#define MTD_ERASE_DONE 0x08
#define MTD_ERASE_FAILED 0x10
+#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff
+
/* If the erase fails, fail_addr might indicate exactly which block failed. If
- fail_addr = 0xffffffff, the failure was not at the device level or was not
+ fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not
specific to any particular block. */
struct erase_info {
struct mtd_info *mtd;
diff --git a/include/linux/mtd/nand-gpio.h b/include/linux/mtd/nand-gpio.h
new file mode 100644
index 00000000000..51534e50f7f
--- /dev/null
+++ b/include/linux/mtd/nand-gpio.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_MTD_NAND_GPIO_H
+#define __LINUX_MTD_NAND_GPIO_H
+
+#include <linux/mtd/nand.h>
+
+struct gpio_nand_platdata {
+ int gpio_nce;
+ int gpio_nwp;
+ int gpio_cle;
+ int gpio_ale;
+ int gpio_rdy;
+ void (*adjust_parts)(struct gpio_nand_platdata *, size_t);
+ struct mtd_partition *parts;
+ unsigned int num_parts;
+ unsigned int options;
+ int chip_delay;
+};
+
+#endif
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 81774e5facf..733d3f3b4eb 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -248,6 +248,7 @@ struct nand_hw_control {
* @read_page_raw: function to read a raw page without ECC
* @write_page_raw: function to write a raw page without ECC
* @read_page: function to read a page according to the ecc generator requirements
+ * @read_subpage: function to read parts of the page covered by ECC.
* @write_page: function to write a page according to the ecc generator requirements
* @read_oob: function to read chip OOB data
* @write_oob: function to write chip OOB data
diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index d1b310c92eb..0c6bbe28f38 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -152,6 +152,8 @@
#define ONENAND_SYS_CFG1_INT (1 << 6)
#define ONENAND_SYS_CFG1_IOBE (1 << 5)
#define ONENAND_SYS_CFG1_RDY_CONF (1 << 4)
+#define ONENAND_SYS_CFG1_HF (1 << 2)
+#define ONENAND_SYS_CFG1_SYNC_WRITE (1 << 1)
/*
* Controller Status Register F240h (R)
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 5014f7a9f5d..c92b4d43960 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -73,7 +73,6 @@ struct device;
struct device_node;
int __devinit of_mtd_parse_partitions(struct device *dev,
- struct mtd_info *mtd,
struct device_node *node,
struct mtd_partition **pparts);
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
new file mode 100644
index 00000000000..e77c1cea404
--- /dev/null
+++ b/include/linux/mtd/sh_flctl.h
@@ -0,0 +1,125 @@
+/*
+ * SuperH FLCTL nand controller
+ *
+ * Copyright © 2008 Renesas Solutions Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef __SH_FLCTL_H__
+#define __SH_FLCTL_H__
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+
+/* FLCTL registers */
+#define FLCMNCR(f) (f->reg + 0x0)
+#define FLCMDCR(f) (f->reg + 0x4)
+#define FLCMCDR(f) (f->reg + 0x8)
+#define FLADR(f) (f->reg + 0xC)
+#define FLADR2(f) (f->reg + 0x3C)
+#define FLDATAR(f) (f->reg + 0x10)
+#define FLDTCNTR(f) (f->reg + 0x14)
+#define FLINTDMACR(f) (f->reg + 0x18)
+#define FLBSYTMR(f) (f->reg + 0x1C)
+#define FLBSYCNT(f) (f->reg + 0x20)
+#define FLDTFIFO(f) (f->reg + 0x24)
+#define FLECFIFO(f) (f->reg + 0x28)
+#define FLTRCR(f) (f->reg + 0x2C)
+#define FL4ECCRESULT0(f) (f->reg + 0x80)
+#define FL4ECCRESULT1(f) (f->reg + 0x84)
+#define FL4ECCRESULT2(f) (f->reg + 0x88)
+#define FL4ECCRESULT3(f) (f->reg + 0x8C)
+#define FL4ECCCR(f) (f->reg + 0x90)
+#define FL4ECCCNT(f) (f->reg + 0x94)
+#define FLERRADR(f) (f->reg + 0x98)
+
+/* FLCMNCR control bits */
+#define ECCPOS2 (0x1 << 25)
+#define _4ECCCNTEN (0x1 << 24)
+#define _4ECCEN (0x1 << 23)
+#define _4ECCCORRECT (0x1 << 22)
+#define SNAND_E (0x1 << 18) /* SNAND (0=512 1=2048)*/
+#define QTSEL_E (0x1 << 17)
+#define ENDIAN (0x1 << 16) /* 1 = little endian */
+#define FCKSEL_E (0x1 << 15)
+#define ECCPOS_00 (0x00 << 12)
+#define ECCPOS_01 (0x01 << 12)
+#define ECCPOS_02 (0x02 << 12)
+#define ACM_SACCES_MODE (0x01 << 10)
+#define NANWF_E (0x1 << 9)
+#define SE_D (0x1 << 8) /* Spare area disable */
+#define CE1_ENABLE (0x1 << 4) /* Chip Enable 1 */
+#define CE0_ENABLE (0x1 << 3) /* Chip Enable 0 */
+#define TYPESEL_SET (0x1 << 0)
+
+/* FLCMDCR control bits */
+#define ADRCNT2_E (0x1 << 31) /* 5byte address enable */
+#define ADRMD_E (0x1 << 26) /* Sector address access */
+#define CDSRC_E (0x1 << 25) /* Data buffer selection */
+#define DOSR_E (0x1 << 24) /* Status read check */
+#define SELRW (0x1 << 21) /* 0:read 1:write */
+#define DOADR_E (0x1 << 20) /* Address stage execute */
+#define ADRCNT_1 (0x00 << 18) /* Address data bytes: 1byte */
+#define ADRCNT_2 (0x01 << 18) /* Address data bytes: 2byte */
+#define ADRCNT_3 (0x02 << 18) /* Address data bytes: 3byte */
+#define ADRCNT_4 (0x03 << 18) /* Address data bytes: 4byte */
+#define DOCMD2_E (0x1 << 17) /* 2nd cmd stage execute */
+#define DOCMD1_E (0x1 << 16) /* 1st cmd stage execute */
+
+/* FLTRCR control bits */
+#define TRSTRT (0x1 << 0) /* translation start */
+#define TREND (0x1 << 1) /* translation end */
+
+/* FL4ECCCR control bits */
+#define _4ECCFA (0x1 << 2) /* 4 symbols correct fault */
+#define _4ECCEND (0x1 << 1) /* 4 symbols end */
+#define _4ECCEXST (0x1 << 0) /* 4 symbols exist */
+
+#define INIT_FL4ECCRESULT_VAL 0x03FF03FF
+#define LOOP_TIMEOUT_MAX 0x00010000
+
+#define mtd_to_flctl(mtd) container_of(mtd, struct sh_flctl, mtd)
+
+struct sh_flctl {
+ struct mtd_info mtd;
+ struct nand_chip chip;
+ void __iomem *reg;
+
+ uint8_t done_buff[2048 + 64]; /* max size 2048 + 64 */
+ int read_bytes;
+ int index;
+ int seqin_column; /* column in SEQIN cmd */
+ int seqin_page_addr; /* page_addr in SEQIN cmd */
+ uint32_t seqin_read_cmd; /* read cmd in SEQIN cmd */
+ int erase1_page_addr; /* page_addr in ERASE1 cmd */
+ uint32_t erase_ADRCNT; /* bits of FLCMDCR in ERASE1 cmd */
+ uint32_t rw_ADRCNT; /* bits of FLCMDCR in READ WRITE cmd */
+
+ int hwecc_cant_correct[4];
+
+ unsigned page_size:1; /* NAND page size (0 = 512, 1 = 2048) */
+ unsigned hwecc:1; /* Hardware ECC (0 = disabled, 1 = enabled) */
+};
+
+struct sh_flctl_platform_data {
+ struct mtd_partition *parts;
+ int nr_parts;
+ unsigned long flcmncr_val;
+
+ unsigned has_hwecc:1;
+};
+
+#endif /* __SH_FLCTL_H__ */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c74d3e87531..b12f93a3c34 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -93,6 +93,11 @@ enum pageflags {
PG_mappedtodisk, /* Has blocks allocated on-disk */
PG_reclaim, /* To be reclaimed asap */
PG_buddy, /* Page is free, on buddy lists */
+ PG_swapbacked, /* Page is backed by RAM/swap */
+#ifdef CONFIG_UNEVICTABLE_LRU
+ PG_unevictable, /* Page is "unevictable" */
+ PG_mlocked, /* Page is vma mlocked */
+#endif
#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
PG_uncached, /* Page has been mapped as uncached */
#endif
@@ -161,6 +166,18 @@ static inline int Page##uname(struct page *page) \
#define TESTSCFLAG(uname, lname) \
TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
+#define SETPAGEFLAG_NOOP(uname) \
+static inline void SetPage##uname(struct page *page) { }
+
+#define CLEARPAGEFLAG_NOOP(uname) \
+static inline void ClearPage##uname(struct page *page) { }
+
+#define __CLEARPAGEFLAG_NOOP(uname) \
+static inline void __ClearPage##uname(struct page *page) { }
+
+#define TESTCLEARFLAG_FALSE(uname) \
+static inline int TestClearPage##uname(struct page *page) { return 0; }
+
struct page; /* forward declaration */
TESTPAGEFLAG(Locked, locked)
@@ -169,6 +186,7 @@ PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
+ TESTCLEARFLAG(Active, active)
__PAGEFLAG(Slab, slab)
PAGEFLAG(Checked, checked) /* Used by some filesystems */
PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */
@@ -176,6 +194,7 @@ PAGEFLAG(SavePinned, savepinned); /* Xen */
PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
__SETPAGEFLAG(Private, private)
+PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
__PAGEFLAG(SlobPage, slob_page)
__PAGEFLAG(SlobFree, slob_free)
@@ -211,6 +230,25 @@ PAGEFLAG(SwapCache, swapcache)
PAGEFLAG_FALSE(SwapCache)
#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
+ TESTCLEARFLAG(Unevictable, unevictable)
+
+#define MLOCK_PAGES 1
+PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
+ TESTSCFLAG(Mlocked, mlocked)
+
+#else
+
+#define MLOCK_PAGES 0
+PAGEFLAG_FALSE(Mlocked)
+ SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
+
+PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable)
+ SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable)
+ __CLEARPAGEFLAG_NOOP(Unevictable)
+#endif
+
#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
PAGEFLAG(Uncached, uncached)
#else
@@ -326,15 +364,25 @@ static inline void __ClearPageTail(struct page *page)
#endif /* !PAGEFLAGS_EXTENDED */
+#ifdef CONFIG_UNEVICTABLE_LRU
+#define __PG_UNEVICTABLE (1 << PG_unevictable)
+#define __PG_MLOCKED (1 << PG_mlocked)
+#else
+#define __PG_UNEVICTABLE 0
+#define __PG_MLOCKED 0
+#endif
+
#define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \
1 << PG_buddy | 1 << PG_writeback | \
- 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active)
+ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
+ __PG_UNEVICTABLE | __PG_MLOCKED)
/*
* Flags checked in bad_page(). Pages on the free list should not have
* these flags set. It they are, there is a problem.
*/
-#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | 1 << PG_reclaim | 1 << PG_dirty)
+#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \
+ 1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked)
/*
* Flags checked when a page is freed. Pages being freed should not have
@@ -347,7 +395,8 @@ static inline void __ClearPageTail(struct page *page)
* Pages being prepped should not have these flags set. It they are, there
* is a problem.
*/
-#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | 1 << PG_reserved | 1 << PG_dirty)
+#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \
+ 1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked)
#endif /* !__GENERATING_BOUNDS_H */
#endif /* PAGE_FLAGS_H */
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
new file mode 100644
index 00000000000..0fd39f2231e
--- /dev/null
+++ b/include/linux/page_cgroup.h
@@ -0,0 +1,103 @@
+#ifndef __LINUX_PAGE_CGROUP_H
+#define __LINUX_PAGE_CGROUP_H
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#include <linux/bit_spinlock.h>
+/*
+ * Page Cgroup can be considered as an extended mem_map.
+ * A page_cgroup page is associated with every page descriptor. The
+ * page_cgroup helps us identify information about the cgroup
+ * All page cgroups are allocated at boot or memory hotplug event,
+ * then the page cgroup for pfn always exists.
+ */
+struct page_cgroup {
+ unsigned long flags;
+ struct mem_cgroup *mem_cgroup;
+ struct page *page;
+ struct list_head lru; /* per cgroup LRU list */
+};
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat);
+void __init page_cgroup_init(void);
+struct page_cgroup *lookup_page_cgroup(struct page *page);
+
+enum {
+ /* flags for mem_cgroup */
+ PCG_LOCK, /* page cgroup is locked */
+ PCG_CACHE, /* charged as cache */
+ PCG_USED, /* this object is in use. */
+ /* flags for LRU placement */
+ PCG_ACTIVE, /* page is active in this cgroup */
+ PCG_FILE, /* page is file system backed */
+ PCG_UNEVICTABLE, /* page is unevictableable */
+};
+
+#define TESTPCGFLAG(uname, lname) \
+static inline int PageCgroup##uname(struct page_cgroup *pc) \
+ { return test_bit(PCG_##lname, &pc->flags); }
+
+#define SETPCGFLAG(uname, lname) \
+static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
+ { set_bit(PCG_##lname, &pc->flags); }
+
+#define CLEARPCGFLAG(uname, lname) \
+static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
+ { clear_bit(PCG_##lname, &pc->flags); }
+
+/* Cache flag is set only once (at allocation) */
+TESTPCGFLAG(Cache, CACHE)
+
+TESTPCGFLAG(Used, USED)
+CLEARPCGFLAG(Used, USED)
+
+/* LRU management flags (from global-lru definition) */
+TESTPCGFLAG(File, FILE)
+SETPCGFLAG(File, FILE)
+CLEARPCGFLAG(File, FILE)
+
+TESTPCGFLAG(Active, ACTIVE)
+SETPCGFLAG(Active, ACTIVE)
+CLEARPCGFLAG(Active, ACTIVE)
+
+TESTPCGFLAG(Unevictable, UNEVICTABLE)
+SETPCGFLAG(Unevictable, UNEVICTABLE)
+CLEARPCGFLAG(Unevictable, UNEVICTABLE)
+
+static inline int page_cgroup_nid(struct page_cgroup *pc)
+{
+ return page_to_nid(pc->page);
+}
+
+static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
+{
+ return page_zonenum(pc->page);
+}
+
+static inline void lock_page_cgroup(struct page_cgroup *pc)
+{
+ bit_spin_lock(PCG_LOCK, &pc->flags);
+}
+
+static inline int trylock_page_cgroup(struct page_cgroup *pc)
+{
+ return bit_spin_trylock(PCG_LOCK, &pc->flags);
+}
+
+static inline void unlock_page_cgroup(struct page_cgroup *pc)
+{
+ bit_spin_unlock(PCG_LOCK, &pc->flags);
+}
+
+#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+struct page_cgroup;
+
+static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+}
+
+static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+ return NULL;
+}
+#endif
+#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5da31c12101..709742be02f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -32,6 +32,34 @@ static inline void mapping_set_error(struct address_space *mapping, int error)
}
}
+#ifdef CONFIG_UNEVICTABLE_LRU
+#define AS_UNEVICTABLE (__GFP_BITS_SHIFT + 2) /* e.g., ramdisk, SHM_LOCK */
+
+static inline void mapping_set_unevictable(struct address_space *mapping)
+{
+ set_bit(AS_UNEVICTABLE, &mapping->flags);
+}
+
+static inline void mapping_clear_unevictable(struct address_space *mapping)
+{
+ clear_bit(AS_UNEVICTABLE, &mapping->flags);
+}
+
+static inline int mapping_unevictable(struct address_space *mapping)
+{
+ if (likely(mapping))
+ return test_bit(AS_UNEVICTABLE, &mapping->flags);
+ return !!mapping;
+}
+#else
+static inline void mapping_set_unevictable(struct address_space *mapping) { }
+static inline void mapping_clear_unevictable(struct address_space *mapping) { }
+static inline int mapping_unevictable(struct address_space *mapping)
+{
+ return 0;
+}
+#endif
+
static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
{
return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
@@ -271,19 +299,19 @@ extern int __lock_page_killable(struct page *page);
extern void __lock_page_nosync(struct page *page);
extern void unlock_page(struct page *page);
-static inline void set_page_locked(struct page *page)
+static inline void __set_page_locked(struct page *page)
{
- set_bit(PG_locked, &page->flags);
+ __set_bit(PG_locked, &page->flags);
}
-static inline void clear_page_locked(struct page *page)
+static inline void __clear_page_locked(struct page *page)
{
- clear_bit(PG_locked, &page->flags);
+ __clear_bit(PG_locked, &page->flags);
}
static inline int trylock_page(struct page *page)
{
- return !test_and_set_bit(PG_locked, &page->flags);
+ return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
}
/*
@@ -410,17 +438,17 @@ extern void __remove_from_page_cache(struct page *page);
/*
* Like add_to_page_cache_locked, but used to add newly allocated pages:
- * the page is new, so we can just run set_page_locked() against it.
+ * the page is new, so we can just run __set_page_locked() against it.
*/
static inline int add_to_page_cache(struct page *page,
struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
{
int error;
- set_page_locked(page);
+ __set_page_locked(page);
error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
if (unlikely(error))
- clear_page_locked(page);
+ __clear_page_locked(page);
return error;
}
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 8eb7fa76c1d..e90a2cb0291 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -23,9 +23,9 @@ struct pagevec {
void __pagevec_release(struct pagevec *pvec);
void __pagevec_release_nonlru(struct pagevec *pvec);
void __pagevec_free(struct pagevec *pvec);
-void __pagevec_lru_add(struct pagevec *pvec);
-void __pagevec_lru_add_active(struct pagevec *pvec);
+void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
void pagevec_strip(struct pagevec *pvec);
+void pagevec_swap_free(struct pagevec *pvec);
unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
pgoff_t start, unsigned nr_pages);
unsigned pagevec_lookup_tag(struct pagevec *pvec,
@@ -81,10 +81,36 @@ static inline void pagevec_free(struct pagevec *pvec)
__pagevec_free(pvec);
}
-static inline void pagevec_lru_add(struct pagevec *pvec)
+static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
+{
+ ____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
+}
+
+static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
+{
+ ____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
+}
+
+static inline void __pagevec_lru_add_file(struct pagevec *pvec)
+{
+ ____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
+}
+
+static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
+{
+ ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
+}
+
+static inline void pagevec_lru_add_file(struct pagevec *pvec)
+{
+ if (pagevec_count(pvec))
+ __pagevec_lru_add_file(pvec);
+}
+
+static inline void pagevec_lru_add_anon(struct pagevec *pvec)
{
if (pagevec_count(pvec))
- __pagevec_lru_add(pvec);
+ __pagevec_lru_add_anon(pvec);
}
#endif /* _LINUX_PAGEVEC_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 98dc6243a70..acf8f24037c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -631,6 +631,8 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i);
int pci_select_bars(struct pci_dev *dev, unsigned long flags);
/* ROM control related routines */
+int pci_enable_rom(struct pci_dev *pdev);
+void pci_disable_rom(struct pci_dev *pdev);
void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size);
void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom);
size_t pci_get_rom_size(void __iomem *rom, size_t size);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index ea7416c901d..22641d5d45d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -94,7 +94,6 @@ extern void ptrace_notify(int exit_code);
extern void __ptrace_link(struct task_struct *child,
struct task_struct *new_parent);
extern void __ptrace_unlink(struct task_struct *child);
-extern void ptrace_untrace(struct task_struct *child);
#define PTRACE_MODE_READ 1
#define PTRACE_MODE_ATTACH 2
/* Returns 0 on success, -errno on denial. */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index fed6f5e0b41..89f0564b10c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -39,18 +39,6 @@ struct anon_vma {
#ifdef CONFIG_MMU
-extern struct kmem_cache *anon_vma_cachep;
-
-static inline struct anon_vma *anon_vma_alloc(void)
-{
- return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
-}
-
-static inline void anon_vma_free(struct anon_vma *anon_vma)
-{
- kmem_cache_free(anon_vma_cachep, anon_vma);
-}
-
static inline void anon_vma_lock(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
@@ -75,6 +63,9 @@ void anon_vma_unlink(struct vm_area_struct *);
void anon_vma_link(struct vm_area_struct *);
void __anon_vma_link(struct vm_area_struct *);
+extern struct anon_vma *page_lock_anon_vma(struct page *page);
+extern void page_unlock_anon_vma(struct anon_vma *anon_vma);
+
/*
* rmap interfaces called when adding or removing pte of page
*/
@@ -117,6 +108,19 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
*/
int page_mkclean(struct page *);
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * called in munlock()/munmap() path to check for other vmas holding
+ * the page mlocked.
+ */
+int try_to_munlock(struct page *);
+#else
+static inline int try_to_munlock(struct page *page)
+{
+ return 0; /* a.k.a. SWAP_SUCCESS */
+}
+#endif
+
#else /* !CONFIG_MMU */
#define anon_vma_init() do {} while (0)
@@ -140,5 +144,6 @@ static inline int page_mkclean(struct page *page)
#define SWAP_SUCCESS 0
#define SWAP_AGAIN 1
#define SWAP_FAIL 2
+#define SWAP_MLOCK 3
#endif /* _LINUX_RMAP_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c226c7b8294..f52dbd3587a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -403,12 +403,21 @@ extern int get_dumpable(struct mm_struct *mm);
#define MMF_DUMP_MAPPED_PRIVATE 4
#define MMF_DUMP_MAPPED_SHARED 5
#define MMF_DUMP_ELF_HEADERS 6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED 8
#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS 5
+#define MMF_DUMP_FILTER_BITS 7
#define MMF_DUMP_FILTER_MASK \
(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
#define MMF_DUMP_FILTER_DEFAULT \
- ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED))
+ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
+ (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF 0
+#endif
struct sighand_struct {
atomic_t count;
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index a1783b229ef..dc50bcc282a 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -60,6 +60,19 @@ static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask)
return seq_bitmap(m, mask->bits, MAX_NUMNODES);
}
+int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
+ unsigned int nr_bits);
+
+static inline int seq_cpumask_list(struct seq_file *m, cpumask_t *mask)
+{
+ return seq_bitmap_list(m, mask->bits, NR_CPUS);
+}
+
+static inline int seq_nodemask_list(struct seq_file *m, nodemask_t *mask)
+{
+ return seq_bitmap_list(m, mask->bits, MAX_NUMNODES);
+}
+
int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
int single_release(struct inode *, struct file *);
void *__seq_open_private(struct file *, const struct seq_operations *, int);
diff --git a/include/linux/swab.h b/include/linux/swab.h
index 270d5c208a8..bbed279f3b3 100644
--- a/include/linux/swab.h
+++ b/include/linux/swab.h
@@ -47,8 +47,6 @@ static inline __attribute_const__ __u16 ___swab16(__u16 val)
{
#ifdef __arch_swab16
return __arch_swab16(val);
-#elif defined(__arch_swab16p)
- return __arch_swab16p(&val);
#else
return __const_swab16(val);
#endif
@@ -58,8 +56,6 @@ static inline __attribute_const__ __u32 ___swab32(__u32 val)
{
#ifdef __arch_swab32
return __arch_swab32(val);
-#elif defined(__arch_swab32p)
- return __arch_swab32p(&val);
#else
return __const_swab32(val);
#endif
@@ -69,8 +65,6 @@ static inline __attribute_const__ __u64 ___swab64(__u64 val)
{
#ifdef __arch_swab64
return __arch_swab64(val);
-#elif defined(__arch_swab64p)
- return __arch_swab64p(&val);
#elif defined(__SWAB_64_THRU_32__)
__u32 h = val >> 32;
__u32 l = val & ((1ULL << 32) - 1);
@@ -84,8 +78,6 @@ static inline __attribute_const__ __u32 ___swahw32(__u32 val)
{
#ifdef __arch_swahw32
return __arch_swahw32(val);
-#elif defined(__arch_swahw32p)
- return __arch_swahw32p(&val);
#else
return __const_swahw32(val);
#endif
@@ -95,8 +87,6 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val)
{
#ifdef __arch_swahb32
return __arch_swahb32(val);
-#elif defined(__arch_swahb32p)
- return __arch_swahb32p(&val);
#else
return __const_swahb32(val);
#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index de40f169a4e..a3af95b2cb6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -7,6 +7,7 @@
#include <linux/list.h>
#include <linux/memcontrol.h>
#include <linux/sched.h>
+#include <linux/node.h>
#include <asm/atomic.h>
#include <asm/page.h>
@@ -171,8 +172,10 @@ extern unsigned int nr_free_pagecache_pages(void);
/* linux/mm/swap.c */
-extern void lru_cache_add(struct page *);
-extern void lru_cache_add_active(struct page *);
+extern void __lru_cache_add(struct page *, enum lru_list lru);
+extern void lru_cache_add_lru(struct page *, enum lru_list lru);
+extern void lru_cache_add_active_or_unevictable(struct page *,
+ struct vm_area_struct *);
extern void activate_page(struct page *);
extern void mark_page_accessed(struct page *);
extern void lru_add_drain(void);
@@ -180,12 +183,38 @@ extern int lru_add_drain_all(void);
extern void rotate_reclaimable_page(struct page *page);
extern void swap_setup(void);
+extern void add_page_to_unevictable_list(struct page *page);
+
+/**
+ * lru_cache_add: add a page to the page lists
+ * @page: the page to add
+ */
+static inline void lru_cache_add_anon(struct page *page)
+{
+ __lru_cache_add(page, LRU_INACTIVE_ANON);
+}
+
+static inline void lru_cache_add_active_anon(struct page *page)
+{
+ __lru_cache_add(page, LRU_ACTIVE_ANON);
+}
+
+static inline void lru_cache_add_file(struct page *page)
+{
+ __lru_cache_add(page, LRU_INACTIVE_FILE);
+}
+
+static inline void lru_cache_add_active_file(struct page *page)
+{
+ __lru_cache_add(page, LRU_ACTIVE_FILE);
+}
+
/* linux/mm/vmscan.c */
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask);
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
gfp_t gfp_mask);
-extern int __isolate_lru_page(struct page *page, int mode);
+extern int __isolate_lru_page(struct page *page, int mode, int file);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
extern int remove_mapping(struct address_space *mapping, struct page *page);
@@ -204,6 +233,34 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
}
#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+extern int page_evictable(struct page *page, struct vm_area_struct *vma);
+extern void scan_mapping_unevictable_pages(struct address_space *);
+
+extern unsigned long scan_unevictable_pages;
+extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
+ void __user *, size_t *, loff_t *);
+extern int scan_unevictable_register_node(struct node *node);
+extern void scan_unevictable_unregister_node(struct node *node);
+#else
+static inline int page_evictable(struct page *page,
+ struct vm_area_struct *vma)
+{
+ return 1;
+}
+
+static inline void scan_mapping_unevictable_pages(struct address_space *mapping)
+{
+}
+
+static inline int scan_unevictable_register_node(struct node *node)
+{
+ return 0;
+}
+
+static inline void scan_unevictable_unregister_node(struct node *node) { }
+#endif
+
extern int kswapd_run(int nid);
#ifdef CONFIG_MMU
@@ -251,6 +308,7 @@ extern sector_t swapdev_block(int, pgoff_t);
extern struct swap_info_struct *get_swap_info_struct(unsigned);
extern int can_share_swap_page(struct page *);
extern int remove_exclusive_swap_page(struct page *);
+extern int remove_exclusive_swap_page_ref(struct page *);
struct backing_dev_info;
/* linux/mm/thrash.c */
@@ -339,6 +397,11 @@ static inline int remove_exclusive_swap_page(struct page *p)
return 0;
}
+static inline int remove_exclusive_swap_page_ref(struct page *page)
+{
+ return 0;
+}
+
static inline swp_entry_t get_swap_page(void)
{
swp_entry_t entry;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b330e289d71..9d68fed50f1 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -21,8 +21,9 @@ struct kobject;
struct module;
/* FIXME
- * The *owner field is no longer used, but leave around
- * until the tree gets cleaned up fully.
+ * The *owner field is no longer used.
+ * x86 tree has been cleaned up. The owner
+ * attribute is still left for other arches.
*/
struct attribute {
const char *name;
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 328eb402272..4c28c4d564e 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -2,6 +2,7 @@
#define _LINUX_VMALLOC_H
#include <linux/spinlock.h>
+#include <linux/init.h>
#include <asm/page.h> /* pgprot_t */
struct vm_area_struct; /* vma defining user mapping in mm_types.h */
@@ -23,7 +24,6 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */
#endif
struct vm_struct {
- /* keep next,addr,size together to speedup lookups */
struct vm_struct *next;
void *addr;
unsigned long size;
@@ -37,6 +37,19 @@ struct vm_struct {
/*
* Highlevel APIs for driver use
*/
+extern void vm_unmap_ram(const void *mem, unsigned int count);
+extern void *vm_map_ram(struct page **pages, unsigned int count,
+ int node, pgprot_t prot);
+extern void vm_unmap_aliases(void);
+
+#ifdef CONFIG_MMU
+extern void __init vmalloc_init(void);
+#else
+static inline void vmalloc_init(void)
+{
+}
+#endif
+
extern void *vmalloc(unsigned long size);
extern void *vmalloc_user(unsigned long size);
extern void *vmalloc_node(unsigned long size, int node);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 58334d43951..9cd3ab0f554 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -41,6 +41,16 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+ UNEVICTABLE_PGCULLED, /* culled to noreclaim list */
+ UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */
+ UNEVICTABLE_PGRESCUED, /* rescued from noreclaim list */
+ UNEVICTABLE_PGMLOCKED,
+ UNEVICTABLE_PGMUNLOCKED,
+ UNEVICTABLE_PGCLEARED, /* on COW, page truncate */
+ UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */
+ UNEVICTABLE_MLOCKFREED,
+#endif
NR_VM_EVENT_ITEMS
};
@@ -159,6 +169,16 @@ static inline unsigned long zone_page_state(struct zone *zone,
return x;
}
+extern unsigned long global_lru_pages(void);
+
+static inline unsigned long zone_lru_pages(struct zone *zone)
+{
+ return (zone_page_state(zone, NR_ACTIVE_ANON)
+ + zone_page_state(zone, NR_ACTIVE_FILE)
+ + zone_page_state(zone, NR_INACTIVE_ANON)
+ + zone_page_state(zone, NR_INACTIVE_FILE));
+}
+
#ifdef CONFIG_NUMA
/*
* Determine the per node value of a stat item. This function
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 0cb63ed2c1f..b8093971ccb 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -2,9 +2,9 @@
#define __NETNS_X_TABLES_H
#include <linux/list.h>
-#include <linux/net.h>
+#include <linux/netfilter.h>
struct netns_xt {
- struct list_head tables[NPROTO];
+ struct list_head tables[NFPROTO_NUMPROTO];
};
#endif
diff --git a/init/Kconfig b/init/Kconfig
index 5ceff3249a2..8828ed0b205 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -299,6 +299,13 @@ config CGROUP_NS
for instance virtual servers and checkpoint/restart
jobs.
+config CGROUP_FREEZER
+ bool "control group freezer subsystem"
+ depends on CGROUPS
+ help
+ Provides a way to freeze and unfreeze all tasks in a
+ cgroup.
+
config CGROUP_DEVICE
bool "Device controller for cgroups"
depends on CGROUPS && EXPERIMENTAL
diff --git a/init/main.c b/init/main.c
index 27f6bf6108e..4371d11721f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -27,6 +27,7 @@
#include <linux/gfp.h>
#include <linux/percpu.h>
#include <linux/kmod.h>
+#include <linux/vmalloc.h>
#include <linux/kernel_stat.h>
#include <linux/start_kernel.h>
#include <linux/security.h>
@@ -642,6 +643,7 @@ asmlinkage void __init start_kernel(void)
initrd_start = 0;
}
#endif
+ vmalloc_init();
vfs_caches_init_early();
cpuset_init_early();
mem_init();
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 96fb36cd987..68eb857cfde 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -52,6 +52,14 @@
#define HARD_MSGMAX (131072/sizeof(void*))
#define DFLT_MSGSIZEMAX 8192 /* max message size */
+/*
+ * Define the ranges various user-specified maximum values can
+ * be set to.
+ */
+#define MIN_MSGMAX 1 /* min value for msg_max */
+#define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */
+#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */
+#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */
struct ext_wait_queue { /* queue of sleeping tasks */
struct task_struct *task;
@@ -134,8 +142,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
info->qsize = 0;
info->user = NULL; /* set when all is ok */
memset(&info->attr, 0, sizeof(info->attr));
- info->attr.mq_maxmsg = DFLT_MSGMAX;
- info->attr.mq_msgsize = DFLT_MSGSIZEMAX;
+ info->attr.mq_maxmsg = msg_max;
+ info->attr.mq_msgsize = msgsize_max;
if (attr) {
info->attr.mq_maxmsg = attr->mq_maxmsg;
info->attr.mq_msgsize = attr->mq_msgsize;
@@ -1191,11 +1199,11 @@ static struct file_system_type mqueue_fs_type = {
.kill_sb = kill_litter_super,
};
-static int msg_max_limit_min = DFLT_MSGMAX;
-static int msg_max_limit_max = HARD_MSGMAX;
+static int msg_max_limit_min = MIN_MSGMAX;
+static int msg_max_limit_max = MAX_MSGMAX;
-static int msg_maxsize_limit_min = DFLT_MSGSIZEMAX;
-static int msg_maxsize_limit_max = INT_MAX;
+static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
+static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
static ctl_table mq_sysctls[] = {
{
diff --git a/ipc/shm.c b/ipc/shm.c
index e77ec698cf4..0add3fa5f54 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -737,6 +737,10 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
case SHM_LOCK:
case SHM_UNLOCK:
{
+ struct file *uninitialized_var(shm_file);
+
+ lru_add_drain_all(); /* drain pagevecs to lru lists */
+
shp = shm_lock_check(ns, shmid);
if (IS_ERR(shp)) {
err = PTR_ERR(shp);
diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer
new file mode 100644
index 00000000000..a3bb4cb5253
--- /dev/null
+++ b/kernel/Kconfig.freezer
@@ -0,0 +1,2 @@
+config FREEZER
+ def_bool PM_SLEEP || CGROUP_FREEZER
diff --git a/kernel/Makefile b/kernel/Makefile
index 4e1d7df7c3e..066550aa61c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
CFLAGS_REMOVE_sched.o = -mno-spe -pg
endif
+obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup.o
obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
obj-$(CONFIG_UTS_NS) += utsname.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c6e1c17e6d..046c1609606 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
struct cg_cgroup_link *link;
struct cg_cgroup_link *saved_link;
- write_lock(&css_set_lock);
hlist_del(&cg->hlist);
css_set_count--;
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
list_del(&link->cgrp_link_list);
kfree(link);
}
-
- write_unlock(&css_set_lock);
}
-static void __release_css_set(struct kref *k, int taskexit)
+static void __put_css_set(struct css_set *cg, int taskexit)
{
int i;
- struct css_set *cg = container_of(k, struct css_set, ref);
-
+ /*
+ * Ensure that the refcount doesn't hit zero while any readers
+ * can see it. Similar to atomic_dec_and_lock(), but for an
+ * rwlock
+ */
+ if (atomic_add_unless(&cg->refcount, -1, 1))
+ return;
+ write_lock(&css_set_lock);
+ if (!atomic_dec_and_test(&cg->refcount)) {
+ write_unlock(&css_set_lock);
+ return;
+ }
unlink_css_set(cg);
+ write_unlock(&css_set_lock);
rcu_read_lock();
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
kfree(cg);
}
-static void release_css_set(struct kref *k)
-{
- __release_css_set(k, 0);
-}
-
-static void release_css_set_taskexit(struct kref *k)
-{
- __release_css_set(k, 1);
-}
-
/*
* refcounted get/put for css_set objects
*/
static inline void get_css_set(struct css_set *cg)
{
- kref_get(&cg->ref);
+ atomic_inc(&cg->refcount);
}
static inline void put_css_set(struct css_set *cg)
{
- kref_put(&cg->ref, release_css_set);
+ __put_css_set(cg, 0);
}
static inline void put_css_set_taskexit(struct css_set *cg)
{
- kref_put(&cg->ref, release_css_set_taskexit);
+ __put_css_set(cg, 1);
}
/*
@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
return NULL;
}
- kref_init(&res->ref);
+ atomic_set(&res->refcount, 1);
INIT_LIST_HEAD(&res->cg_links);
INIT_LIST_HEAD(&res->tasks);
INIT_HLIST_NODE(&res->hlist);
@@ -870,6 +868,14 @@ static struct super_operations cgroup_ops = {
.remount_fs = cgroup_remount,
};
+static void init_cgroup_housekeeping(struct cgroup *cgrp)
+{
+ INIT_LIST_HEAD(&cgrp->sibling);
+ INIT_LIST_HEAD(&cgrp->children);
+ INIT_LIST_HEAD(&cgrp->css_sets);
+ INIT_LIST_HEAD(&cgrp->release_list);
+ init_rwsem(&cgrp->pids_mutex);
+}
static void init_cgroup_root(struct cgroupfs_root *root)
{
struct cgroup *cgrp = &root->top_cgroup;
@@ -878,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
root->number_of_cgroups = 1;
cgrp->root = root;
cgrp->top_cgroup = cgrp;
- INIT_LIST_HEAD(&cgrp->sibling);
- INIT_LIST_HEAD(&cgrp->children);
- INIT_LIST_HEAD(&cgrp->css_sets);
- INIT_LIST_HEAD(&cgrp->release_list);
+ init_cgroup_housekeeping(cgrp);
}
static int cgroup_test_super(struct super_block *sb, void *data)
@@ -1728,7 +1731,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
read_lock(&css_set_lock);
list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
- count += atomic_read(&link->cg->ref.refcount);
+ count += atomic_read(&link->cg->refcount);
}
read_unlock(&css_set_lock);
return count;
@@ -1997,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
* but we cannot guarantee that the information we produce is correct
* unless we produce it entirely atomically.
*
- * Upon tasks file open(), a struct ctr_struct is allocated, that
- * will have a pointer to an array (also allocated here). The struct
- * ctr_struct * is stored in file->private_data. Its resources will
- * be freed by release() when the file is closed. The array is used
- * to sprintf the PIDs and then used by read().
*/
-struct ctr_struct {
- char *buf;
- int bufsz;
-};
/*
* Load into 'pidarray' up to 'npids' of the tasks using cgroup
@@ -2088,42 +2082,132 @@ static int cmppid(const void *a, const void *b)
return *(pid_t *)a - *(pid_t *)b;
}
+
/*
- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
- * decimal pids in 'buf'. Don't write more than 'sz' chars, but return
- * count 'cnt' of how many chars would be written if buf were large enough.
+ * seq_file methods for the "tasks" file. The seq_file position is the
+ * next pid to display; the seq_file iterator is a pointer to the pid
+ * in the cgroup->tasks_pids array.
*/
-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+
+static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
{
- int cnt = 0;
- int i;
+ /*
+ * Initially we receive a position value that corresponds to
+ * one more than the last pid shown (or 0 on the first call or
+ * after a seek to the start). Use a binary-search to find the
+ * next pid to display, if any
+ */
+ struct cgroup *cgrp = s->private;
+ int index = 0, pid = *pos;
+ int *iter;
- for (i = 0; i < npids; i++)
- cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
- return cnt;
+ down_read(&cgrp->pids_mutex);
+ if (pid) {
+ int end = cgrp->pids_length;
+ int i;
+ while (index < end) {
+ int mid = (index + end) / 2;
+ if (cgrp->tasks_pids[mid] == pid) {
+ index = mid;
+ break;
+ } else if (cgrp->tasks_pids[mid] <= pid)
+ index = mid + 1;
+ else
+ end = mid;
+ }
+ }
+ /* If we're off the end of the array, we're done */
+ if (index >= cgrp->pids_length)
+ return NULL;
+ /* Update the abstract position to be the actual pid that we found */
+ iter = cgrp->tasks_pids + index;
+ *pos = *iter;
+ return iter;
+}
+
+static void cgroup_tasks_stop(struct seq_file *s, void *v)
+{
+ struct cgroup *cgrp = s->private;
+ up_read(&cgrp->pids_mutex);
}
+static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct cgroup *cgrp = s->private;
+ int *p = v;
+ int *end = cgrp->tasks_pids + cgrp->pids_length;
+
+ /*
+ * Advance to the next pid in the array. If this goes off the
+ * end, we're done
+ */
+ p++;
+ if (p >= end) {
+ return NULL;
+ } else {
+ *pos = *p;
+ return p;
+ }
+}
+
+static int cgroup_tasks_show(struct seq_file *s, void *v)
+{
+ return seq_printf(s, "%d\n", *(int *)v);
+}
+
+static struct seq_operations cgroup_tasks_seq_operations = {
+ .start = cgroup_tasks_start,
+ .stop = cgroup_tasks_stop,
+ .next = cgroup_tasks_next,
+ .show = cgroup_tasks_show,
+};
+
+static void release_cgroup_pid_array(struct cgroup *cgrp)
+{
+ down_write(&cgrp->pids_mutex);
+ BUG_ON(!cgrp->pids_use_count);
+ if (!--cgrp->pids_use_count) {
+ kfree(cgrp->tasks_pids);
+ cgrp->tasks_pids = NULL;
+ cgrp->pids_length = 0;
+ }
+ up_write(&cgrp->pids_mutex);
+}
+
+static int cgroup_tasks_release(struct inode *inode, struct file *file)
+{
+ struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+ if (!(file->f_mode & FMODE_READ))
+ return 0;
+
+ release_cgroup_pid_array(cgrp);
+ return seq_release(inode, file);
+}
+
+static struct file_operations cgroup_tasks_operations = {
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = cgroup_file_write,
+ .release = cgroup_tasks_release,
+};
+
/*
- * Handle an open on 'tasks' file. Prepare a buffer listing the
+ * Handle an open on 'tasks' file. Prepare an array containing the
* process id's of tasks currently attached to the cgroup being opened.
- *
- * Does not require any specific cgroup mutexes, and does not take any.
*/
+
static int cgroup_tasks_open(struct inode *unused, struct file *file)
{
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- struct ctr_struct *ctr;
pid_t *pidarray;
int npids;
- char c;
+ int retval;
+ /* Nothing to do for write-only files */
if (!(file->f_mode & FMODE_READ))
return 0;
- ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
- if (!ctr)
- goto err0;
-
/*
* If cgroup gets more users after we read count, we won't have
* enough space - tough. This race is indistinguishable to the
@@ -2131,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
* show up until sometime later on.
*/
npids = cgroup_task_count(cgrp);
- if (npids) {
- pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
- if (!pidarray)
- goto err1;
-
- npids = pid_array_load(pidarray, npids, cgrp);
- sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
- /* Call pid_array_to_buf() twice, first just to get bufsz */
- ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
- ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
- if (!ctr->buf)
- goto err2;
- ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
-
- kfree(pidarray);
- } else {
- ctr->buf = NULL;
- ctr->bufsz = 0;
- }
- file->private_data = ctr;
- return 0;
-
-err2:
- kfree(pidarray);
-err1:
- kfree(ctr);
-err0:
- return -ENOMEM;
-}
+ pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+ if (!pidarray)
+ return -ENOMEM;
+ npids = pid_array_load(pidarray, npids, cgrp);
+ sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
- struct cftype *cft,
- struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos)
-{
- struct ctr_struct *ctr = file->private_data;
+ /*
+ * Store the array in the cgroup, freeing the old
+ * array if necessary
+ */
+ down_write(&cgrp->pids_mutex);
+ kfree(cgrp->tasks_pids);
+ cgrp->tasks_pids = pidarray;
+ cgrp->pids_length = npids;
+ cgrp->pids_use_count++;
+ up_write(&cgrp->pids_mutex);
- return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
-}
+ file->f_op = &cgroup_tasks_operations;
-static int cgroup_tasks_release(struct inode *unused_inode,
- struct file *file)
-{
- struct ctr_struct *ctr;
-
- if (file->f_mode & FMODE_READ) {
- ctr = file->private_data;
- kfree(ctr->buf);
- kfree(ctr);
+ retval = seq_open(file, &cgroup_tasks_seq_operations);
+ if (retval) {
+ release_cgroup_pid_array(cgrp);
+ return retval;
}
+ ((struct seq_file *)file->private_data)->private = cgrp;
return 0;
}
@@ -2210,7 +2268,6 @@ static struct cftype files[] = {
{
.name = "tasks",
.open = cgroup_tasks_open,
- .read = cgroup_tasks_read,
.write_u64 = cgroup_tasks_write,
.release = cgroup_tasks_release,
.private = FILE_TASKLIST,
@@ -2300,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
mutex_lock(&cgroup_mutex);
- INIT_LIST_HEAD(&cgrp->sibling);
- INIT_LIST_HEAD(&cgrp->children);
- INIT_LIST_HEAD(&cgrp->css_sets);
- INIT_LIST_HEAD(&cgrp->release_list);
+ init_cgroup_housekeeping(cgrp);
cgrp->parent = parent;
cgrp->root = parent->root;
@@ -2495,8 +2549,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
int __init cgroup_init_early(void)
{
int i;
- kref_init(&init_css_set.ref);
- kref_get(&init_css_set.ref);
+ atomic_set(&init_css_set.refcount, 1);
INIT_LIST_HEAD(&init_css_set.cg_links);
INIT_LIST_HEAD(&init_css_set.tasks);
INIT_HLIST_NODE(&init_css_set.hlist);
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
index c3dc3aba4c0..daca6209202 100644
--- a/kernel/cgroup_debug.c
+++ b/kernel/cgroup_debug.c
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
u64 count;
rcu_read_lock();
- count = atomic_read(&current->cgroups->ref.refcount);
+ count = atomic_read(&current->cgroups->refcount);
rcu_read_unlock();
return count;
}
@@ -90,7 +90,7 @@ static struct cftype files[] = {
{
.name = "releasable",
.read_u64 = releasable_read,
- }
+ },
};
static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
new file mode 100644
index 00000000000..e9505695449
--- /dev/null
+++ b/kernel/cgroup_freezer.c
@@ -0,0 +1,379 @@
+/*
+ * cgroup_freezer.c - control group freezer subsystem
+ *
+ * Copyright IBM Corporation, 2007
+ *
+ * Author : Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/freezer.h>
+#include <linux/seq_file.h>
+
+enum freezer_state {
+ CGROUP_THAWED = 0,
+ CGROUP_FREEZING,
+ CGROUP_FROZEN,
+};
+
+struct freezer {
+ struct cgroup_subsys_state css;
+ enum freezer_state state;
+ spinlock_t lock; /* protects _writes_ to state */
+};
+
+static inline struct freezer *cgroup_freezer(
+ struct cgroup *cgroup)
+{
+ return container_of(
+ cgroup_subsys_state(cgroup, freezer_subsys_id),
+ struct freezer, css);
+}
+
+static inline struct freezer *task_freezer(struct task_struct *task)
+{
+ return container_of(task_subsys_state(task, freezer_subsys_id),
+ struct freezer, css);
+}
+
+int cgroup_frozen(struct task_struct *task)
+{
+ struct freezer *freezer;
+ enum freezer_state state;
+
+ task_lock(task);
+ freezer = task_freezer(task);
+ state = freezer->state;
+ task_unlock(task);
+
+ return state == CGROUP_FROZEN;
+}
+
+/*
+ * cgroups_write_string() limits the size of freezer state strings to
+ * CGROUP_LOCAL_BUFFER_SIZE
+ */
+static const char *freezer_state_strs[] = {
+ "THAWED",
+ "FREEZING",
+ "FROZEN",
+};
+
+/*
+ * State diagram
+ * Transitions are caused by userspace writes to the freezer.state file.
+ * The values in parenthesis are state labels. The rest are edge labels.
+ *
+ * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
+ * ^ ^ | |
+ * | \_______THAWED_______/ |
+ * \__________________________THAWED____________/
+ */
+
+struct cgroup_subsys freezer_subsys;
+
+/* Locks taken and their ordering
+ * ------------------------------
+ * css_set_lock
+ * cgroup_mutex (AKA cgroup_lock)
+ * task->alloc_lock (AKA task_lock)
+ * freezer->lock
+ * task->sighand->siglock
+ *
+ * cgroup code forces css_set_lock to be taken before task->alloc_lock
+ *
+ * freezer_create(), freezer_destroy():
+ * cgroup_mutex [ by cgroup core ]
+ *
+ * can_attach():
+ * cgroup_mutex
+ *
+ * cgroup_frozen():
+ * task->alloc_lock (to get task's cgroup)
+ *
+ * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
+ * task->alloc_lock (to get task's cgroup)
+ * freezer->lock
+ * sighand->siglock (if the cgroup is freezing)
+ *
+ * freezer_read():
+ * cgroup_mutex
+ * freezer->lock
+ * read_lock css_set_lock (cgroup iterator start)
+ *
+ * freezer_write() (freeze):
+ * cgroup_mutex
+ * freezer->lock
+ * read_lock css_set_lock (cgroup iterator start)
+ * sighand->siglock
+ *
+ * freezer_write() (unfreeze):
+ * cgroup_mutex
+ * freezer->lock
+ * read_lock css_set_lock (cgroup iterator start)
+ * task->alloc_lock (to prevent races with freeze_task())
+ * sighand->siglock
+ */
+static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
+ struct cgroup *cgroup)
+{
+ struct freezer *freezer;
+
+ freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
+ if (!freezer)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&freezer->lock);
+ freezer->state = CGROUP_THAWED;
+ return &freezer->css;
+}
+
+static void freezer_destroy(struct cgroup_subsys *ss,
+ struct cgroup *cgroup)
+{
+ kfree(cgroup_freezer(cgroup));
+}
+
+/* Task is frozen or will freeze immediately when next it gets woken */
+static bool is_task_frozen_enough(struct task_struct *task)
+{
+ return frozen(task) ||
+ (task_is_stopped_or_traced(task) && freezing(task));
+}
+
+/*
+ * The call to cgroup_lock() in the freezer.state write method prevents
+ * a write to that file racing against an attach, and hence the
+ * can_attach() result will remain valid until the attach completes.
+ */
+static int freezer_can_attach(struct cgroup_subsys *ss,
+ struct cgroup *new_cgroup,
+ struct task_struct *task)
+{
+ struct freezer *freezer;
+ int retval;
+
+ /* Anything frozen can't move or be moved to/from */
+
+ if (is_task_frozen_enough(task))
+ return -EBUSY;
+
+ freezer = cgroup_freezer(new_cgroup);
+ if (freezer->state == CGROUP_FROZEN)
+ return -EBUSY;
+
+ retval = 0;
+ task_lock(task);
+ freezer = task_freezer(task);
+ if (freezer->state == CGROUP_FROZEN)
+ retval = -EBUSY;
+ task_unlock(task);
+ return retval;
+}
+
+static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
+{
+ struct freezer *freezer;
+
+ task_lock(task);
+ freezer = task_freezer(task);
+ task_unlock(task);
+
+ BUG_ON(freezer->state == CGROUP_FROZEN);
+ spin_lock_irq(&freezer->lock);
+ /* Locking avoids race with FREEZING -> THAWED transitions. */
+ if (freezer->state == CGROUP_FREEZING)
+ freeze_task(task, true);
+ spin_unlock_irq(&freezer->lock);
+}
+
+/*
+ * caller must hold freezer->lock
+ */
+static void update_freezer_state(struct cgroup *cgroup,
+ struct freezer *freezer)
+{
+ struct cgroup_iter it;
+ struct task_struct *task;
+ unsigned int nfrozen = 0, ntotal = 0;
+
+ cgroup_iter_start(cgroup, &it);
+ while ((task = cgroup_iter_next(cgroup, &it))) {
+ ntotal++;
+ if (is_task_frozen_enough(task))
+ nfrozen++;
+ }
+
+ /*
+ * Transition to FROZEN when no new tasks can be added ensures
+ * that we never exist in the FROZEN state while there are unfrozen
+ * tasks.
+ */
+ if (nfrozen == ntotal)
+ freezer->state = CGROUP_FROZEN;
+ else if (nfrozen > 0)
+ freezer->state = CGROUP_FREEZING;
+ else
+ freezer->state = CGROUP_THAWED;
+ cgroup_iter_end(cgroup, &it);
+}
+
+static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
+ struct seq_file *m)
+{
+ struct freezer *freezer;
+ enum freezer_state state;
+
+ if (!cgroup_lock_live_group(cgroup))
+ return -ENODEV;
+
+ freezer = cgroup_freezer(cgroup);
+ spin_lock_irq(&freezer->lock);
+ state = freezer->state;
+ if (state == CGROUP_FREEZING) {
+ /* We change from FREEZING to FROZEN lazily if the cgroup was
+ * only partially frozen when we exitted write. */
+ update_freezer_state(cgroup, freezer);
+ state = freezer->state;
+ }
+ spin_unlock_irq(&freezer->lock);
+ cgroup_unlock();
+
+ seq_puts(m, freezer_state_strs[state]);
+ seq_putc(m, '\n');
+ return 0;
+}
+
+static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+ struct cgroup_iter it;
+ struct task_struct *task;
+ unsigned int num_cant_freeze_now = 0;
+
+ freezer->state = CGROUP_FREEZING;
+ cgroup_iter_start(cgroup, &it);
+ while ((task = cgroup_iter_next(cgroup, &it))) {
+ if (!freeze_task(task, true))
+ continue;
+ if (is_task_frozen_enough(task))
+ continue;
+ if (!freezing(task) && !freezer_should_skip(task))
+ num_cant_freeze_now++;
+ }
+ cgroup_iter_end(cgroup, &it);
+
+ return num_cant_freeze_now ? -EBUSY : 0;
+}
+
+static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+ struct cgroup_iter it;
+ struct task_struct *task;
+
+ cgroup_iter_start(cgroup, &it);
+ while ((task = cgroup_iter_next(cgroup, &it))) {
+ int do_wake;
+
+ task_lock(task);
+ do_wake = __thaw_process(task);
+ task_unlock(task);
+ if (do_wake)
+ wake_up_process(task);
+ }
+ cgroup_iter_end(cgroup, &it);
+ freezer->state = CGROUP_THAWED;
+
+ return 0;
+}
+
+static int freezer_change_state(struct cgroup *cgroup,
+ enum freezer_state goal_state)
+{
+ struct freezer *freezer;
+ int retval = 0;
+
+ freezer = cgroup_freezer(cgroup);
+ spin_lock_irq(&freezer->lock);
+ update_freezer_state(cgroup, freezer);
+ if (goal_state == freezer->state)
+ goto out;
+ switch (freezer->state) {
+ case CGROUP_THAWED:
+ retval = try_to_freeze_cgroup(cgroup, freezer);
+ break;
+ case CGROUP_FREEZING:
+ if (goal_state == CGROUP_FROZEN) {
+ /* Userspace is retrying after
+ * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
+ retval = try_to_freeze_cgroup(cgroup, freezer);
+ break;
+ }
+ /* state == FREEZING and goal_state == THAWED, so unfreeze */
+ case CGROUP_FROZEN:
+ retval = unfreeze_cgroup(cgroup, freezer);
+ break;
+ default:
+ break;
+ }
+out:
+ spin_unlock_irq(&freezer->lock);
+
+ return retval;
+}
+
+static int freezer_write(struct cgroup *cgroup,
+ struct cftype *cft,
+ const char *buffer)
+{
+ int retval;
+ enum freezer_state goal_state;
+
+ if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0)
+ goal_state = CGROUP_THAWED;
+ else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
+ goal_state = CGROUP_FROZEN;
+ else
+ return -EIO;
+
+ if (!cgroup_lock_live_group(cgroup))
+ return -ENODEV;
+ retval = freezer_change_state(cgroup, goal_state);
+ cgroup_unlock();
+ return retval;
+}
+
+static struct cftype files[] = {
+ {
+ .name = "state",
+ .read_seq_string = freezer_read,
+ .write_string = freezer_write,
+ },
+};
+
+static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
+{
+ return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys freezer_subsys = {
+ .name = "freezer",
+ .create = freezer_create,
+ .destroy = freezer_destroy,
+ .populate = freezer_populate,
+ .subsys_id = freezer_subsys_id,
+ .can_attach = freezer_can_attach,
+ .attach = NULL,
+ .fork = freezer_fork,
+ .exit = NULL,
+};
diff --git a/kernel/configs.c b/kernel/configs.c
index 4c345210ed8..abaee684ecb 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -54,9 +54,6 @@
#ifdef CONFIG_IKCONFIG_PROC
-/**************************************************/
-/* globals and useful constants */
-
static ssize_t
ikconfig_read_current(struct file *file, char __user *buf,
size_t len, loff_t * offset)
@@ -71,9 +68,6 @@ static const struct file_operations ikconfig_file_ops = {
.read = ikconfig_read_current,
};
-/***************************************************/
-/* ikconfig_init: start up everything we need to */
-
static int __init ikconfig_init(void)
{
struct proc_dir_entry *entry;
@@ -89,9 +83,6 @@ static int __init ikconfig_init(void)
return 0;
}
-/***************************************************/
-/* ikconfig_cleanup: clean up our mess */
-
static void __exit ikconfig_cleanup(void)
{
remove_proc_entry("config.gz", NULL);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index eab7bd6628e..3e00526f52e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1172,7 +1172,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
{
struct cpuset trialcs;
int err;
- int cpus_nonempty, balance_flag_changed;
+ int balance_flag_changed;
trialcs = *cs;
if (turning_on)
@@ -1184,7 +1184,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
if (err < 0)
return err;
- cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
balance_flag_changed = (is_sched_load_balance(cs) !=
is_sched_load_balance(&trialcs));
@@ -1192,7 +1191,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
cs->flags = trialcs.flags;
mutex_unlock(&callback_mutex);
- if (cpus_nonempty && balance_flag_changed)
+ if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
async_rebuild_sched_domains();
return 0;
@@ -2437,19 +2436,15 @@ const struct file_operations proc_cpuset_operations = {
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t");
- m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
- task->cpus_allowed);
+ seq_cpumask(m, &task->cpus_allowed);
seq_printf(m, "\n");
seq_printf(m, "Cpus_allowed_list:\t");
- m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
- task->cpus_allowed);
+ seq_cpumask_list(m, &task->cpus_allowed);
seq_printf(m, "\n");
seq_printf(m, "Mems_allowed:\t");
- m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
- task->mems_allowed);
+ seq_nodemask(m, &task->mems_allowed);
seq_printf(m, "\n");
seq_printf(m, "Mems_allowed_list:\t");
- m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
- task->mems_allowed);
+ seq_nodemask_list(m, &task->mems_allowed);
seq_printf(m, "\n");
}
diff --git a/kernel/freezer.c b/kernel/freezer.c
new file mode 100644
index 00000000000..ba6248b323e
--- /dev/null
+++ b/kernel/freezer.c
@@ -0,0 +1,154 @@
+/*
+ * kernel/freezer.c - Function to freeze a process
+ *
+ * Originally from kernel/power/process.c
+ */
+
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/freezer.h>
+
+/*
+ * freezing is complete, mark current process as frozen
+ */
+static inline void frozen_process(void)
+{
+ if (!unlikely(current->flags & PF_NOFREEZE)) {
+ current->flags |= PF_FROZEN;
+ wmb();
+ }
+ clear_freeze_flag(current);
+}
+
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(void)
+{
+ /* Hmm, should we be allowed to suspend when there are realtime
+ processes around? */
+ long save;
+
+ task_lock(current);
+ if (freezing(current)) {
+ frozen_process();
+ task_unlock(current);
+ } else {
+ task_unlock(current);
+ return;
+ }
+ save = current->state;
+ pr_debug("%s entered refrigerator\n", current->comm);
+
+ spin_lock_irq(&current->sighand->siglock);
+ recalc_sigpending(); /* We sent fake signal, clean it up */
+ spin_unlock_irq(&current->sighand->siglock);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (!frozen(current))
+ break;
+ schedule();
+ }
+ pr_debug("%s left refrigerator\n", current->comm);
+ __set_current_state(save);
+}
+EXPORT_SYMBOL(refrigerator);
+
+static void fake_signal_wake_up(struct task_struct *p)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ signal_wake_up(p, 0);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+}
+
+/**
+ * freeze_task - send a freeze request to given task
+ * @p: task to send the request to
+ * @sig_only: if set, the request will only be sent if the task has the
+ * PF_FREEZER_NOSIG flag unset
+ * Return value: 'false', if @sig_only is set and the task has
+ * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ *
+ * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
+ * either sending a fake signal to it or waking it up, depending on whether
+ * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
+ * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
+ * TIF_FREEZE flag will not be set.
+ */
+bool freeze_task(struct task_struct *p, bool sig_only)
+{
+ /*
+ * We first check if the task is freezing and next if it has already
+ * been frozen to avoid the race with frozen_process() which first marks
+ * the task as frozen and next clears its TIF_FREEZE.
+ */
+ if (!freezing(p)) {
+ rmb();
+ if (frozen(p))
+ return false;
+
+ if (!sig_only || should_send_signal(p))
+ set_freeze_flag(p);
+ else
+ return false;
+ }
+
+ if (should_send_signal(p)) {
+ if (!signal_pending(p))
+ fake_signal_wake_up(p);
+ } else if (sig_only) {
+ return false;
+ } else {
+ wake_up_state(p, TASK_INTERRUPTIBLE);
+ }
+
+ return true;
+}
+
+void cancel_freezing(struct task_struct *p)
+{
+ unsigned long flags;
+
+ if (freezing(p)) {
+ pr_debug(" clean up: %s\n", p->comm);
+ clear_freeze_flag(p);
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ recalc_sigpending_and_wake(p);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ }
+}
+
+/*
+ * Wake up a frozen process
+ *
+ * task_lock() is needed to prevent the race with refrigerator() which may
+ * occur if the freezing of tasks fails. Namely, without the lock, if the
+ * freezing of tasks failed, thaw_tasks() might have run before a task in
+ * refrigerator() could call frozen_process(), in which case the task would be
+ * frozen and no one would thaw it.
+ */
+int __thaw_process(struct task_struct *p)
+{
+ if (frozen(p)) {
+ p->flags &= ~PF_FROZEN;
+ return 1;
+ }
+ clear_freeze_flag(p);
+ return 0;
+}
+
+int thaw_process(struct task_struct *p)
+{
+ task_lock(p);
+ if (__thaw_process(p) == 1) {
+ task_unlock(p);
+ wake_up_process(p);
+ return 1;
+ }
+ task_unlock(p);
+ return 0;
+}
+EXPORT_SYMBOL(thaw_process);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index aef265325cd..777ac458ac9 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1371,6 +1371,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_SYMBOL(node_online_map);
VMCOREINFO_SYMBOL(swapper_pg_dir);
VMCOREINFO_SYMBOL(_stext);
+ VMCOREINFO_SYMBOL(vmlist);
#ifndef CONFIG_NEED_MULTIPLE_NODES
VMCOREINFO_SYMBOL(mem_map);
@@ -1406,6 +1407,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_OFFSET(free_area, free_list);
VMCOREINFO_OFFSET(list_head, next);
VMCOREINFO_OFFSET(list_head, prev);
+ VMCOREINFO_OFFSET(vm_struct, addr);
VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
VMCOREINFO_NUMBER(NR_FREE_PAGES);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 96cff2f8710..14ec64fe175 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -171,12 +171,11 @@ EXPORT_SYMBOL(kthread_create);
*/
void kthread_bind(struct task_struct *k, unsigned int cpu)
{
- if (k->state != TASK_UNINTERRUPTIBLE) {
+ /* Must have done schedule() in kthread() before we set_task_cpu */
+ if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) {
WARN_ON(1);
return;
}
- /* Must have done schedule() in kthread() before we set_task_cpu */
- wait_task_inactive(k, 0);
set_task_cpu(k, cpu);
k->cpus_allowed = cpumask_of_cpu(cpu);
k->rt.nr_cpus_allowed = 1;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 278946aecaf..ca634019497 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -28,121 +28,6 @@ static inline int freezeable(struct task_struct * p)
return 1;
}
-/*
- * freezing is complete, mark current process as frozen
- */
-static inline void frozen_process(void)
-{
- if (!unlikely(current->flags & PF_NOFREEZE)) {
- current->flags |= PF_FROZEN;
- wmb();
- }
- clear_freeze_flag(current);
-}
-
-/* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
-{
- /* Hmm, should we be allowed to suspend when there are realtime
- processes around? */
- long save;
-
- task_lock(current);
- if (freezing(current)) {
- frozen_process();
- task_unlock(current);
- } else {
- task_unlock(current);
- return;
- }
- save = current->state;
- pr_debug("%s entered refrigerator\n", current->comm);
-
- spin_lock_irq(&current->sighand->siglock);
- recalc_sigpending(); /* We sent fake signal, clean it up */
- spin_unlock_irq(&current->sighand->siglock);
-
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!frozen(current))
- break;
- schedule();
- }
- pr_debug("%s left refrigerator\n", current->comm);
- __set_current_state(save);
-}
-
-static void fake_signal_wake_up(struct task_struct *p)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&p->sighand->siglock, flags);
- signal_wake_up(p, 0);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
-}
-
-static inline bool should_send_signal(struct task_struct *p)
-{
- return !(p->flags & PF_FREEZER_NOSIG);
-}
-
-/**
- * freeze_task - send a freeze request to given task
- * @p: task to send the request to
- * @sig_only: if set, the request will only be sent if the task has the
- * PF_FREEZER_NOSIG flag unset
- * Return value: 'false', if @sig_only is set and the task has
- * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
- *
- * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
- * either sending a fake signal to it or waking it up, depending on whether
- * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
- * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- * TIF_FREEZE flag will not be set.
- */
-static bool freeze_task(struct task_struct *p, bool sig_only)
-{
- /*
- * We first check if the task is freezing and next if it has already
- * been frozen to avoid the race with frozen_process() which first marks
- * the task as frozen and next clears its TIF_FREEZE.
- */
- if (!freezing(p)) {
- rmb();
- if (frozen(p))
- return false;
-
- if (!sig_only || should_send_signal(p))
- set_freeze_flag(p);
- else
- return false;
- }
-
- if (should_send_signal(p)) {
- if (!signal_pending(p))
- fake_signal_wake_up(p);
- } else if (sig_only) {
- return false;
- } else {
- wake_up_state(p, TASK_INTERRUPTIBLE);
- }
-
- return true;
-}
-
-static void cancel_freezing(struct task_struct *p)
-{
- unsigned long flags;
-
- if (freezing(p)) {
- pr_debug(" clean up: %s\n", p->comm);
- clear_freeze_flag(p);
- spin_lock_irqsave(&p->sighand->siglock, flags);
- recalc_sigpending_and_wake(p);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
- }
-}
-
static int try_to_freeze_tasks(bool sig_only)
{
struct task_struct *g, *p;
@@ -250,6 +135,9 @@ static void thaw_tasks(bool nosig_only)
if (nosig_only && should_send_signal(p))
continue;
+ if (cgroup_frozen(p))
+ continue;
+
thaw_process(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
@@ -264,4 +152,3 @@ void thaw_processes(void)
printk("done.\n");
}
-EXPORT_SYMBOL(refrigerator);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 356699a96d5..1e68e4c39e2 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -45,7 +45,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
* TASK_TRACED, resume it now.
* Requires that irqs be disabled.
*/
-void ptrace_untrace(struct task_struct *child)
+static void ptrace_untrace(struct task_struct *child)
{
spin_lock(&child->sighand->siglock);
if (task_is_traced(child)) {
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ca4bbbe04aa..59236e8b9da 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -54,9 +54,9 @@
#include <linux/cpu.h>
#include <linux/random.h>
#include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
#include <linux/cpumask.h>
#include <linux/rcupreempt_trace.h>
+#include <asm/byteorder.h>
/*
* PREEMPT_RCU data structures.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 617d41e4d6a..b3cc73931d1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "scan_unevictable_pages",
+ .data = &scan_unevictable_pages,
+ .maxlen = sizeof(scan_unevictable_pages),
+ .mode = 0644,
+ .proc_handler = &scan_unevictable_handler,
+ },
+#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06fb57c86de..482df94ea21 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -316,17 +316,6 @@ int bitmap_scnprintf(char *buf, unsigned int buflen,
EXPORT_SYMBOL(bitmap_scnprintf);
/**
- * bitmap_scnprintf_len - return buffer length needed to convert
- * bitmap to an ASCII hex string
- * @nr_bits: number of bits to be converted
- */
-int bitmap_scnprintf_len(unsigned int nr_bits)
-{
- unsigned int nr_nibbles = ALIGN(nr_bits, 4) / 4;
- return nr_nibbles + ALIGN(nr_nibbles, CHUNKSZ / 4) / (CHUNKSZ / 4) - 1;
-}
-
-/**
* __bitmap_parse - convert an ASCII hex string into a bitmap.
* @buf: pointer to buffer containing string.
* @buflen: buffer size in bytes. If string is smaller than this
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index cceecb6a963..a013bbc2371 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -24,6 +24,7 @@
#include <linux/kernel.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
+#include <linux/ioport.h>
#include <asm/page.h> /* for PAGE_SIZE */
#include <asm/div64.h>
@@ -550,18 +551,51 @@ static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int
#endif
}
+static char *resource_string(char *buf, char *end, struct resource *res, int field_width, int precision, int flags)
+{
+#ifndef IO_RSRC_PRINTK_SIZE
+#define IO_RSRC_PRINTK_SIZE 4
+#endif
+
+#ifndef MEM_RSRC_PRINTK_SIZE
+#define MEM_RSRC_PRINTK_SIZE 8
+#endif
+
+ /* room for the actual numbers, the two "0x", -, [, ] and the final zero */
+ char sym[4*sizeof(resource_size_t) + 8];
+ char *p = sym, *pend = sym + sizeof(sym);
+ int size = -1;
+
+ if (res->flags & IORESOURCE_IO)
+ size = IO_RSRC_PRINTK_SIZE;
+ else if (res->flags & IORESOURCE_MEM)
+ size = MEM_RSRC_PRINTK_SIZE;
+
+ *p++ = '[';
+ p = number(p, pend, res->start, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+ *p++ = '-';
+ p = number(p, pend, res->end, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+ *p++ = ']';
+ *p = 0;
+
+ return string(buf, end, sym, field_width, precision, flags);
+}
+
/*
* Show a '%p' thing. A kernel extension is that the '%p' is followed
* by an extra set of alphanumeric characters that are extended format
* specifiers.
*
- * Right now we just handle 'F' (for symbolic Function descriptor pointers)
- * and 'S' (for Symbolic direct pointers), but this can easily be
- * extended in the future (network address types etc).
+ * Right now we handle:
+ *
+ * - 'F' For symbolic function descriptor pointers
+ * - 'S' For symbolic direct pointers
+ * - 'R' For a struct resource pointer, it prints the range of
+ * addresses (not the name nor the flags)
*
- * The difference between 'S' and 'F' is that on ia64 and ppc64 function
- * pointers are really function descriptors, which contain a pointer the
- * real address.
+ * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
+ * function pointers are really function descriptors, which contain a
+ * pointer to the real address.
*/
static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
{
@@ -571,6 +605,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
/* Fallthrough */
case 'S':
return symbol_string(buf, end, ptr, field_width, precision, flags);
+ case 'R':
+ return resource_string(buf, end, ptr, field_width, precision, flags);
}
flags |= SMALL;
if (field_width == -1) {
@@ -590,6 +626,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
* This function follows C99 vsnprintf, but has some extensions:
* %pS output the name of a text symbol
* %pF output the name of a function pointer
+ * %pR output the address range in a struct resource
*
* The return value is the number of characters which would
* be generated for the given input, excluding the trailing
diff --git a/mm/Kconfig b/mm/Kconfig
index 1a501a4de95..5b5790f8a81 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -209,5 +209,16 @@ config VIRT_TO_BUS
def_bool y
depends on !ARCH_NO_VIRT_TO_BUS
+config UNEVICTABLE_LRU
+ bool "Add LRU list to track non-evictable pages"
+ default y
+ depends on MMU
+ help
+ Keeps unevictable pages off of the active and inactive pageout
+ lists, so kswapd will not waste CPU time or have its balancing
+ algorithms thrown off by scanning these pages. Selecting this
+ will use one page flag and increase the code size a little,
+ say Y unless you know what you are doing.
+
config MMU_NOTIFIER
bool
diff --git a/mm/Makefile b/mm/Makefile
index da4ccf015ae..c06b45a1ff5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -33,5 +33,4 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
-
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
diff --git a/mm/filemap.c b/mm/filemap.c
index 903bf316912..ab8553658af 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
#include <linux/cpuset.h>
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include <linux/memcontrol.h>
+#include <linux/mm_inline.h> /* for page_is_file_cache() */
#include "internal.h"
/*
@@ -115,12 +116,12 @@ void __remove_from_page_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
- mem_cgroup_uncharge_cache_page(page);
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
mapping->nrpages--;
__dec_zone_page_state(page, NR_FILE_PAGES);
BUG_ON(page_mapped(page));
+ mem_cgroup_uncharge_cache_page(page);
/*
* Some filesystems seem to re-dirty the page even after
@@ -492,9 +493,24 @@ EXPORT_SYMBOL(add_to_page_cache_locked);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask)
{
- int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
- if (ret == 0)
- lru_cache_add(page);
+ int ret;
+
+ /*
+ * Splice_read and readahead add shmem/tmpfs pages into the page cache
+ * before shmem_readpage has a chance to mark them as SwapBacked: they
+ * need to go on the active_anon lru below, and mem_cgroup_cache_charge
+ * (called in add_to_page_cache) needs to know where they're going too.
+ */
+ if (mapping_cap_swap_backed(mapping))
+ SetPageSwapBacked(page);
+
+ ret = add_to_page_cache(page, mapping, offset, gfp_mask);
+ if (ret == 0) {
+ if (page_is_file_cache(page))
+ lru_cache_add_file(page);
+ else
+ lru_cache_add_active_anon(page);
+ }
return ret;
}
@@ -557,17 +573,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
* mechananism between PageLocked pages and PageWriteback pages is shared.
* But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
*
- * The first mb is necessary to safely close the critical section opened by the
- * test_and_set_bit() to lock the page; the second mb is necessary to enforce
- * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
- * races with a parallel wait_on_page_locked()).
+ * The mb is necessary to enforce ordering between the clear_bit and the read
+ * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
*/
void unlock_page(struct page *page)
{
- smp_mb__before_clear_bit();
- if (!test_and_clear_bit(PG_locked, &page->flags))
- BUG();
- smp_mb__after_clear_bit();
+ VM_BUG_ON(!PageLocked(page));
+ clear_bit_unlock(PG_locked, &page->flags);
+ smp_mb__after_clear_bit();
wake_up_page(page, PG_locked);
}
EXPORT_SYMBOL(unlock_page);
diff --git a/mm/fremap.c b/mm/fremap.c
index 7881638e4a1..7d12ca70ef7 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -21,6 +21,8 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include "internal.h"
+
static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
@@ -215,15 +217,31 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
spin_unlock(&mapping->i_mmap_lock);
}
+ if (vma->vm_flags & VM_LOCKED) {
+ /*
+ * drop PG_Mlocked flag for over-mapped range
+ */
+ unsigned int saved_flags = vma->vm_flags;
+ munlock_vma_pages_range(vma, start, start + size);
+ vma->vm_flags = saved_flags;
+ }
+
mmu_notifier_invalidate_range_start(mm, start, start + size);
err = populate_range(mm, vma, start, size, pgoff);
mmu_notifier_invalidate_range_end(mm, start, start + size);
if (!err && !(flags & MAP_NONBLOCK)) {
- if (unlikely(has_write_lock)) {
- downgrade_write(&mm->mmap_sem);
- has_write_lock = 0;
+ if (vma->vm_flags & VM_LOCKED) {
+ /*
+ * might be mapping previously unmapped range of file
+ */
+ mlock_vma_pages_range(vma, start, start + size);
+ } else {
+ if (unlikely(has_write_lock)) {
+ downgrade_write(&mm->mmap_sem);
+ has_write_lock = 0;
+ }
+ make_pages_present(start, start+size);
}
- make_pages_present(start, start+size);
}
/*
@@ -240,4 +258,3 @@ out:
return err;
}
-
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38633864a93..ce8cbb29860 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -262,7 +262,7 @@ struct resv_map {
struct list_head regions;
};
-struct resv_map *resv_map_alloc(void)
+static struct resv_map *resv_map_alloc(void)
{
struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
if (!resv_map)
@@ -274,7 +274,7 @@ struct resv_map *resv_map_alloc(void)
return resv_map;
}
-void resv_map_release(struct kref *ref)
+static void resv_map_release(struct kref *ref)
{
struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
@@ -289,7 +289,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return (struct resv_map *)(get_vma_private_data(vma) &
~HPAGE_RESV_MASK);
- return 0;
+ return NULL;
}
static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
@@ -1459,11 +1459,11 @@ int hugetlb_report_meminfo(char *buf)
{
struct hstate *h = &default_hstate;
return sprintf(buf,
- "HugePages_Total: %5lu\n"
- "HugePages_Free: %5lu\n"
- "HugePages_Rsvd: %5lu\n"
- "HugePages_Surp: %5lu\n"
- "Hugepagesize: %5lu kB\n",
+ "HugePages_Total: %5lu\n"
+ "HugePages_Free: %5lu\n"
+ "HugePages_Rsvd: %5lu\n"
+ "HugePages_Surp: %5lu\n"
+ "Hugepagesize: %8lu kB\n",
h->nr_huge_pages,
h->free_huge_pages,
h->resv_huge_pages,
@@ -1747,10 +1747,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
* from other VMAs and let the children be SIGKILLed if they are faulting the
* same region.
*/
-int unmap_ref_private(struct mm_struct *mm,
- struct vm_area_struct *vma,
- struct page *page,
- unsigned long address)
+static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page *page, unsigned long address)
{
struct vm_area_struct *iter_vma;
struct address_space *mapping;
@@ -2073,6 +2071,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
return NULL;
}
+static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
+{
+ if (!ptep || write || shared)
+ return 0;
+ else
+ return huge_pte_none(huge_ptep_get(ptep));
+}
+
int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, int *length, int i,
@@ -2082,6 +2088,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long vaddr = *position;
int remainder = *length;
struct hstate *h = hstate_vma(vma);
+ int zeropage_ok = 0;
+ int shared = vma->vm_flags & VM_SHARED;
spin_lock(&mm->page_table_lock);
while (vaddr < vma->vm_end && remainder) {
@@ -2094,8 +2102,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
* first, for the page indexing below to work.
*/
pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+ if (huge_zeropage_ok(pte, write, shared))
+ zeropage_ok = 1;
- if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
+ if (!pte ||
+ (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
(write && !pte_write(huge_ptep_get(pte)))) {
int ret;
@@ -2115,8 +2126,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
page = pte_page(huge_ptep_get(pte));
same_page:
if (pages) {
- get_page(page);
- pages[i] = page + pfn_offset;
+ if (zeropage_ok)
+ pages[i] = ZERO_PAGE(0);
+ else
+ pages[i] = page + pfn_offset;
+ get_page(pages[i]);
}
if (vmas)
diff --git a/mm/internal.h b/mm/internal.h
index 1f43f741697..e4e728bdf32 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -39,6 +39,15 @@ static inline void __put_page(struct page *page)
atomic_dec(&page->_count);
}
+/*
+ * in mm/vmscan.c:
+ */
+extern int isolate_lru_page(struct page *page);
+extern void putback_lru_page(struct page *page);
+
+/*
+ * in mm/page_alloc.c
+ */
extern void __free_pages_bootmem(struct page *page, unsigned int order);
/*
@@ -52,6 +61,120 @@ static inline unsigned long page_order(struct page *page)
return page_private(page);
}
+extern long mlock_vma_pages_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
+extern void munlock_vma_pages_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
+static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
+{
+ munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
+}
+
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * unevictable_migrate_page() called only from migrate_page_copy() to
+ * migrate unevictable flag to new page.
+ * Note that the old page has been isolated from the LRU lists at this
+ * point so we don't need to worry about LRU statistics.
+ */
+static inline void unevictable_migrate_page(struct page *new, struct page *old)
+{
+ if (TestClearPageUnevictable(old))
+ SetPageUnevictable(new);
+}
+#else
+static inline void unevictable_migrate_page(struct page *new, struct page *old)
+{
+}
+#endif
+
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * Called only in fault path via page_evictable() for a new page
+ * to determine if it's being mapped into a LOCKED vma.
+ * If so, mark page as mlocked.
+ */
+static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page)
+{
+ VM_BUG_ON(PageLRU(page));
+
+ if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
+ return 0;
+
+ if (!TestSetPageMlocked(page)) {
+ inc_zone_page_state(page, NR_MLOCK);
+ count_vm_event(UNEVICTABLE_PGMLOCKED);
+ }
+ return 1;
+}
+
+/*
+ * must be called with vma's mmap_sem held for read, and page locked.
+ */
+extern void mlock_vma_page(struct page *page);
+
+/*
+ * Clear the page's PageMlocked(). This can be useful in a situation where
+ * we want to unconditionally remove a page from the pagecache -- e.g.,
+ * on truncation or freeing.
+ *
+ * It is legal to call this function for any page, mlocked or not.
+ * If called for a page that is still mapped by mlocked vmas, all we do
+ * is revert to lazy LRU behaviour -- semantics are not broken.
+ */
+extern void __clear_page_mlock(struct page *page);
+static inline void clear_page_mlock(struct page *page)
+{
+ if (unlikely(TestClearPageMlocked(page)))
+ __clear_page_mlock(page);
+}
+
+/*
+ * mlock_migrate_page - called only from migrate_page_copy() to
+ * migrate the Mlocked page flag; update statistics.
+ */
+static inline void mlock_migrate_page(struct page *newpage, struct page *page)
+{
+ if (TestClearPageMlocked(page)) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __dec_zone_page_state(page, NR_MLOCK);
+ SetPageMlocked(newpage);
+ __inc_zone_page_state(newpage, NR_MLOCK);
+ local_irq_restore(flags);
+ }
+}
+
+/*
+ * free_page_mlock() -- clean up attempts to free and mlocked() page.
+ * Page should not be on lru, so no need to fix that up.
+ * free_pages_check() will verify...
+ */
+static inline void free_page_mlock(struct page *page)
+{
+ if (unlikely(TestClearPageMlocked(page))) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __dec_zone_page_state(page, NR_MLOCK);
+ __count_vm_event(UNEVICTABLE_MLOCKFREED);
+ local_irq_restore(flags);
+ }
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p)
+{
+ return 0;
+}
+static inline void clear_page_mlock(struct page *page) { }
+static inline void mlock_vma_page(struct page *page) { }
+static inline void mlock_migrate_page(struct page *new, struct page *old) { }
+static inline void free_page_mlock(struct page *page) { }
+
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
/*
* FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
* so all functions starting at paging_init should be marked __init
@@ -120,4 +243,12 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
}
#endif /* CONFIG_SPARSEMEM */
+#define GUP_FLAGS_WRITE 0x1
+#define GUP_FLAGS_FORCE 0x2
+#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
+
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int len, int flags,
+ struct page **pages, struct vm_area_struct **vmas);
+
#endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 36896f3eb7f..d4a92b63e98 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -32,11 +32,12 @@
#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
+#include <linux/mm_inline.h>
+#include <linux/page_cgroup.h>
#include <asm/uaccess.h>
struct cgroup_subsys mem_cgroup_subsys __read_mostly;
-static struct kmem_cache *page_cgroup_cache __read_mostly;
#define MEM_CGROUP_RECLAIM_RETRIES 5
/*
@@ -65,11 +66,10 @@ struct mem_cgroup_stat {
/*
* For accounting under irq disable, no need for increment preempt count.
*/
-static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat,
+static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat,
enum mem_cgroup_stat_index idx, int val)
{
- int cpu = smp_processor_id();
- stat->cpustat[cpu].count[idx] += val;
+ stat->count[idx] += val;
}
static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
@@ -85,22 +85,13 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
/*
* per-zone information in memory controller.
*/
-
-enum mem_cgroup_zstat_index {
- MEM_CGROUP_ZSTAT_ACTIVE,
- MEM_CGROUP_ZSTAT_INACTIVE,
-
- NR_MEM_CGROUP_ZSTAT,
-};
-
struct mem_cgroup_per_zone {
/*
* spin_lock to protect the per cgroup LRU
*/
spinlock_t lru_lock;
- struct list_head active_list;
- struct list_head inactive_list;
- unsigned long count[NR_MEM_CGROUP_ZSTAT];
+ struct list_head lists[NR_LRU_LISTS];
+ unsigned long count[NR_LRU_LISTS];
};
/* Macro for accessing counter */
#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)])
@@ -144,69 +135,52 @@ struct mem_cgroup {
};
static struct mem_cgroup init_mem_cgroup;
-/*
- * We use the lower bit of the page->page_cgroup pointer as a bit spin
- * lock. We need to ensure that page->page_cgroup is at least two
- * byte aligned (based on comments from Nick Piggin). But since
- * bit_spin_lock doesn't actually set that lock bit in a non-debug
- * uniprocessor kernel, we should avoid setting it here too.
- */
-#define PAGE_CGROUP_LOCK_BIT 0x0
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT)
-#else
-#define PAGE_CGROUP_LOCK 0x0
-#endif
-
-/*
- * A page_cgroup page is associated with every page descriptor. The
- * page_cgroup helps us identify information about the cgroup
- */
-struct page_cgroup {
- struct list_head lru; /* per cgroup LRU list */
- struct page *page;
- struct mem_cgroup *mem_cgroup;
- int flags;
-};
-#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
-#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
-
-static int page_cgroup_nid(struct page_cgroup *pc)
-{
- return page_to_nid(pc->page);
-}
-
-static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
-{
- return page_zonenum(pc->page);
-}
-
enum charge_type {
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
MEM_CGROUP_CHARGE_TYPE_MAPPED,
+ MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */
MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
+ NR_CHARGE_TYPE,
+};
+
+/* only for here (for easy reading.) */
+#define PCGF_CACHE (1UL << PCG_CACHE)
+#define PCGF_USED (1UL << PCG_USED)
+#define PCGF_ACTIVE (1UL << PCG_ACTIVE)
+#define PCGF_LOCK (1UL << PCG_LOCK)
+#define PCGF_FILE (1UL << PCG_FILE)
+static const unsigned long
+pcg_default_flags[NR_CHARGE_TYPE] = {
+ PCGF_CACHE | PCGF_FILE | PCGF_USED | PCGF_LOCK, /* File Cache */
+ PCGF_ACTIVE | PCGF_USED | PCGF_LOCK, /* Anon */
+ PCGF_ACTIVE | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
+ 0, /* FORCE */
};
/*
* Always modified under lru lock. Then, not necessary to preempt_disable()
*/
-static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
- bool charge)
+static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
+ struct page_cgroup *pc,
+ bool charge)
{
int val = (charge)? 1 : -1;
struct mem_cgroup_stat *stat = &mem->stat;
+ struct mem_cgroup_stat_cpu *cpustat;
VM_BUG_ON(!irqs_disabled());
- if (flags & PAGE_CGROUP_FLAG_CACHE)
- __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val);
+
+ cpustat = &stat->cpustat[smp_processor_id()];
+ if (PageCgroupCache(pc))
+ __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
else
- __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val);
+ __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val);
if (charge)
- __mem_cgroup_stat_add_safe(stat,
+ __mem_cgroup_stat_add_safe(cpustat,
MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
else
- __mem_cgroup_stat_add_safe(stat,
+ __mem_cgroup_stat_add_safe(cpustat,
MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
}
@@ -227,7 +201,7 @@ page_cgroup_zoneinfo(struct page_cgroup *pc)
}
static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem,
- enum mem_cgroup_zstat_index idx)
+ enum lru_list idx)
{
int nid, zid;
struct mem_cgroup_per_zone *mz;
@@ -262,85 +236,77 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
struct mem_cgroup, css);
}
-static inline int page_cgroup_locked(struct page *page)
-{
- return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
-{
- VM_BUG_ON(!page_cgroup_locked(page));
- page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK);
-}
-
-struct page_cgroup *page_get_page_cgroup(struct page *page)
-{
- return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK);
-}
-
-static void lock_page_cgroup(struct page *page)
-{
- bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static int try_lock_page_cgroup(struct page *page)
-{
- return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static void unlock_page_cgroup(struct page *page)
-{
- bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
struct page_cgroup *pc)
{
- int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
+ int lru = LRU_BASE;
+
+ if (PageCgroupUnevictable(pc))
+ lru = LRU_UNEVICTABLE;
+ else {
+ if (PageCgroupActive(pc))
+ lru += LRU_ACTIVE;
+ if (PageCgroupFile(pc))
+ lru += LRU_FILE;
+ }
- if (from)
- MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
- else
- MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
+ MEM_CGROUP_ZSTAT(mz, lru) -= 1;
- mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
+ mem_cgroup_charge_statistics(pc->mem_cgroup, pc, false);
list_del(&pc->lru);
}
static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
struct page_cgroup *pc)
{
- int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
-
- if (!to) {
- MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
- list_add(&pc->lru, &mz->inactive_list);
- } else {
- MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
- list_add(&pc->lru, &mz->active_list);
+ int lru = LRU_BASE;
+
+ if (PageCgroupUnevictable(pc))
+ lru = LRU_UNEVICTABLE;
+ else {
+ if (PageCgroupActive(pc))
+ lru += LRU_ACTIVE;
+ if (PageCgroupFile(pc))
+ lru += LRU_FILE;
}
- mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true);
+
+ MEM_CGROUP_ZSTAT(mz, lru) += 1;
+ list_add(&pc->lru, &mz->lists[lru]);
+
+ mem_cgroup_charge_statistics(pc->mem_cgroup, pc, true);
}
-static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
+static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru)
{
- int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
+ int active = PageCgroupActive(pc);
+ int file = PageCgroupFile(pc);
+ int unevictable = PageCgroupUnevictable(pc);
+ enum lru_list from = unevictable ? LRU_UNEVICTABLE :
+ (LRU_FILE * !!file + !!active);
- if (from)
- MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
- else
- MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
+ if (lru == from)
+ return;
- if (active) {
- MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
- pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
- list_move(&pc->lru, &mz->active_list);
+ MEM_CGROUP_ZSTAT(mz, from) -= 1;
+ /*
+ * However this is done under mz->lru_lock, another flags, which
+ * are not related to LRU, will be modified from out-of-lock.
+ * We have to use atomic set/clear flags.
+ */
+ if (is_unevictable_lru(lru)) {
+ ClearPageCgroupActive(pc);
+ SetPageCgroupUnevictable(pc);
} else {
- MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
- pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
- list_move(&pc->lru, &mz->inactive_list);
+ if (is_active_lru(lru))
+ SetPageCgroupActive(pc);
+ else
+ ClearPageCgroupActive(pc);
+ ClearPageCgroupUnevictable(pc);
}
+
+ MEM_CGROUP_ZSTAT(mz, lru) += 1;
+ list_move(&pc->lru, &mz->lists[lru]);
}
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
@@ -356,7 +322,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
/*
* This routine assumes that the appropriate zone's lru lock is already held
*/
-void mem_cgroup_move_lists(struct page *page, bool active)
+void mem_cgroup_move_lists(struct page *page, enum lru_list lru)
{
struct page_cgroup *pc;
struct mem_cgroup_per_zone *mz;
@@ -372,17 +338,16 @@ void mem_cgroup_move_lists(struct page *page, bool active)
* safely get to page_cgroup without it, so just try_lock it:
* mem_cgroup_isolate_pages allows for page left on wrong list.
*/
- if (!try_lock_page_cgroup(page))
+ pc = lookup_page_cgroup(page);
+ if (!trylock_page_cgroup(pc))
return;
-
- pc = page_get_page_cgroup(page);
- if (pc) {
+ if (pc && PageCgroupUsed(pc)) {
mz = page_cgroup_zoneinfo(pc);
spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_move_lists(pc, active);
+ __mem_cgroup_move_lists(pc, lru);
spin_unlock_irqrestore(&mz->lru_lock, flags);
}
- unlock_page_cgroup(page);
+ unlock_page_cgroup(pc);
}
/*
@@ -403,21 +368,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
}
/*
- * This function is called from vmscan.c. In page reclaiming loop. balance
- * between active and inactive list is calculated. For memory controller
- * page reclaiming, we should use using mem_cgroup's imbalance rather than
- * zone's global lru imbalance.
- */
-long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
-{
- unsigned long active, inactive;
- /* active and inactive are the number of pages. 'long' is ok.*/
- active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE);
- inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE);
- return (long) (active / (inactive + 1));
-}
-
-/*
* prev_priority control...this will be used in memory reclaim path.
*/
int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
@@ -444,28 +394,17 @@ void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
* (see include/linux/mmzone.h)
*/
-long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
- struct zone *zone, int priority)
+long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
+ int priority, enum lru_list lru)
{
- long nr_active;
+ long nr_pages;
int nid = zone->zone_pgdat->node_id;
int zid = zone_idx(zone);
struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
- nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE);
- return (nr_active >> priority);
-}
+ nr_pages = MEM_CGROUP_ZSTAT(mz, lru);
-long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
- struct zone *zone, int priority)
-{
- long nr_inactive;
- int nid = zone->zone_pgdat->node_id;
- int zid = zone_idx(zone);
- struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
-
- nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE);
- return (nr_inactive >> priority);
+ return (nr_pages >> priority);
}
unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -473,7 +412,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
unsigned long *scanned, int order,
int mode, struct zone *z,
struct mem_cgroup *mem_cont,
- int active)
+ int active, int file)
{
unsigned long nr_taken = 0;
struct page *page;
@@ -484,38 +423,38 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
int nid = z->zone_pgdat->node_id;
int zid = zone_idx(z);
struct mem_cgroup_per_zone *mz;
+ int lru = LRU_FILE * !!file + !!active;
BUG_ON(!mem_cont);
mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
- if (active)
- src = &mz->active_list;
- else
- src = &mz->inactive_list;
-
+ src = &mz->lists[lru];
spin_lock(&mz->lru_lock);
scan = 0;
list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
if (scan >= nr_to_scan)
break;
+ if (unlikely(!PageCgroupUsed(pc)))
+ continue;
page = pc->page;
if (unlikely(!PageLRU(page)))
continue;
- if (PageActive(page) && !active) {
- __mem_cgroup_move_lists(pc, true);
- continue;
- }
- if (!PageActive(page) && active) {
- __mem_cgroup_move_lists(pc, false);
+ /*
+ * TODO: play better with lumpy reclaim, grabbing anything.
+ */
+ if (PageUnevictable(page) ||
+ (PageActive(page) && !active) ||
+ (!PageActive(page) && active)) {
+ __mem_cgroup_move_lists(pc, page_lru(page));
continue;
}
scan++;
list_move(&pc->lru, &pc_list);
- if (__isolate_lru_page(page, mode) == 0) {
+ if (__isolate_lru_page(page, mode, file) == 0) {
list_move(&page->lru, dst);
nr_taken++;
}
@@ -540,26 +479,27 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
{
struct mem_cgroup *mem;
struct page_cgroup *pc;
- unsigned long flags;
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup_per_zone *mz;
+ unsigned long flags;
- pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
- if (unlikely(pc == NULL))
- goto err;
-
+ pc = lookup_page_cgroup(page);
+ /* can happen at boot */
+ if (unlikely(!pc))
+ return 0;
+ prefetchw(pc);
/*
* We always charge the cgroup the mm_struct belongs to.
* The mm_struct's mem_cgroup changes on task migration if the
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
+
if (likely(!memcg)) {
rcu_read_lock();
mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
if (unlikely(!mem)) {
rcu_read_unlock();
- kmem_cache_free(page_cgroup_cache, pc);
return 0;
}
/*
@@ -572,7 +512,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
css_get(&memcg->css);
}
- while (res_counter_charge(&mem->res, PAGE_SIZE)) {
+ while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
if (!(gfp_mask & __GFP_WAIT))
goto out;
@@ -595,39 +535,33 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
}
}
- pc->mem_cgroup = mem;
- pc->page = page;
- /*
- * If a page is accounted as a page cache, insert to inactive list.
- * If anon, insert to active list.
- */
- if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
- pc->flags = PAGE_CGROUP_FLAG_CACHE;
- else
- pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
- lock_page_cgroup(page);
- if (unlikely(page_get_page_cgroup(page))) {
- unlock_page_cgroup(page);
+ lock_page_cgroup(pc);
+ if (unlikely(PageCgroupUsed(pc))) {
+ unlock_page_cgroup(pc);
res_counter_uncharge(&mem->res, PAGE_SIZE);
css_put(&mem->css);
- kmem_cache_free(page_cgroup_cache, pc);
+
goto done;
}
- page_assign_page_cgroup(page, pc);
+ pc->mem_cgroup = mem;
+ /*
+ * If a page is accounted as a page cache, insert to inactive list.
+ * If anon, insert to active list.
+ */
+ pc->flags = pcg_default_flags[ctype];
mz = page_cgroup_zoneinfo(pc);
+
spin_lock_irqsave(&mz->lru_lock, flags);
__mem_cgroup_add_list(mz, pc);
spin_unlock_irqrestore(&mz->lru_lock, flags);
+ unlock_page_cgroup(pc);
- unlock_page_cgroup(page);
done:
return 0;
out:
css_put(&mem->css);
- kmem_cache_free(page_cgroup_cache, pc);
-err:
return -ENOMEM;
}
@@ -635,7 +569,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
{
if (mem_cgroup_subsys.disabled)
return 0;
-
+ if (PageCompound(page))
+ return 0;
/*
* If already mapped, we don't have to account.
* If page cache, page->mapping has address_space.
@@ -656,7 +591,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
{
if (mem_cgroup_subsys.disabled)
return 0;
-
+ if (PageCompound(page))
+ return 0;
/*
* Corner case handling. This is called from add_to_page_cache()
* in usual. But some FS (shmem) precharges this page before calling it
@@ -669,22 +605,27 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
if (!(gfp_mask & __GFP_WAIT)) {
struct page_cgroup *pc;
- lock_page_cgroup(page);
- pc = page_get_page_cgroup(page);
- if (pc) {
- VM_BUG_ON(pc->page != page);
- VM_BUG_ON(!pc->mem_cgroup);
- unlock_page_cgroup(page);
+
+ pc = lookup_page_cgroup(page);
+ if (!pc)
+ return 0;
+ lock_page_cgroup(pc);
+ if (PageCgroupUsed(pc)) {
+ unlock_page_cgroup(pc);
return 0;
}
- unlock_page_cgroup(page);
+ unlock_page_cgroup(pc);
}
if (unlikely(!mm))
mm = &init_mm;
- return mem_cgroup_charge_common(page, mm, gfp_mask,
+ if (page_is_file_cache(page))
+ return mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+ else
+ return mem_cgroup_charge_common(page, mm, gfp_mask,
+ MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
}
/*
@@ -704,44 +645,46 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
/*
* Check if our page_cgroup is valid
*/
- lock_page_cgroup(page);
- pc = page_get_page_cgroup(page);
- if (unlikely(!pc))
- goto unlock;
-
- VM_BUG_ON(pc->page != page);
+ pc = lookup_page_cgroup(page);
+ if (unlikely(!pc || !PageCgroupUsed(pc)))
+ return;
- if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
- && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
- || page_mapped(page)))
- goto unlock;
+ lock_page_cgroup(pc);
+ if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page))
+ || !PageCgroupUsed(pc)) {
+ /* This happens at race in zap_pte_range() and do_swap_page()*/
+ unlock_page_cgroup(pc);
+ return;
+ }
+ ClearPageCgroupUsed(pc);
+ mem = pc->mem_cgroup;
mz = page_cgroup_zoneinfo(pc);
spin_lock_irqsave(&mz->lru_lock, flags);
__mem_cgroup_remove_list(mz, pc);
spin_unlock_irqrestore(&mz->lru_lock, flags);
+ unlock_page_cgroup(pc);
- page_assign_page_cgroup(page, NULL);
- unlock_page_cgroup(page);
-
- mem = pc->mem_cgroup;
res_counter_uncharge(&mem->res, PAGE_SIZE);
css_put(&mem->css);
- kmem_cache_free(page_cgroup_cache, pc);
return;
-unlock:
- unlock_page_cgroup(page);
}
void mem_cgroup_uncharge_page(struct page *page)
{
+ /* early check. */
+ if (page_mapped(page))
+ return;
+ if (page->mapping && !PageAnon(page))
+ return;
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
void mem_cgroup_uncharge_cache_page(struct page *page)
{
VM_BUG_ON(page_mapped(page));
+ VM_BUG_ON(page->mapping);
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
}
@@ -758,15 +701,19 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
if (mem_cgroup_subsys.disabled)
return 0;
- lock_page_cgroup(page);
- pc = page_get_page_cgroup(page);
- if (pc) {
+ pc = lookup_page_cgroup(page);
+ lock_page_cgroup(pc);
+ if (PageCgroupUsed(pc)) {
mem = pc->mem_cgroup;
css_get(&mem->css);
- if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
- ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+ if (PageCgroupCache(pc)) {
+ if (page_is_file_cache(page))
+ ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+ else
+ ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+ }
}
- unlock_page_cgroup(page);
+ unlock_page_cgroup(pc);
if (mem) {
ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
ctype, mem);
@@ -791,7 +738,7 @@ void mem_cgroup_end_migration(struct page *newpage)
*/
if (!newpage->mapping)
__mem_cgroup_uncharge_common(newpage,
- MEM_CGROUP_CHARGE_TYPE_FORCE);
+ MEM_CGROUP_CHARGE_TYPE_FORCE);
else if (PageAnon(newpage))
mem_cgroup_uncharge_page(newpage);
}
@@ -863,7 +810,7 @@ int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
#define FORCE_UNCHARGE_BATCH (128)
static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
struct mem_cgroup_per_zone *mz,
- int active)
+ enum lru_list lru)
{
struct page_cgroup *pc;
struct page *page;
@@ -871,15 +818,14 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
unsigned long flags;
struct list_head *list;
- if (active)
- list = &mz->active_list;
- else
- list = &mz->inactive_list;
+ list = &mz->lists[lru];
spin_lock_irqsave(&mz->lru_lock, flags);
while (!list_empty(list)) {
pc = list_entry(list->prev, struct page_cgroup, lru);
page = pc->page;
+ if (!PageCgroupUsed(pc))
+ break;
get_page(page);
spin_unlock_irqrestore(&mz->lru_lock, flags);
/*
@@ -894,8 +840,10 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
count = FORCE_UNCHARGE_BATCH;
cond_resched();
}
- } else
- cond_resched();
+ } else {
+ spin_lock_irqsave(&mz->lru_lock, flags);
+ break;
+ }
spin_lock_irqsave(&mz->lru_lock, flags);
}
spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -919,15 +867,17 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
while (mem->res.usage > 0) {
if (atomic_read(&mem->css.cgroup->count) > 0)
goto out;
+ /* This is for making all *used* pages to be on LRU. */
+ lru_add_drain_all();
for_each_node_state(node, N_POSSIBLE)
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
struct mem_cgroup_per_zone *mz;
+ enum lru_list l;
mz = mem_cgroup_zoneinfo(mem, node, zid);
- /* drop all page_cgroup in active_list */
- mem_cgroup_force_empty_list(mem, mz, 1);
- /* drop all page_cgroup in inactive_list */
- mem_cgroup_force_empty_list(mem, mz, 0);
+ for_each_lru(l)
+ mem_cgroup_force_empty_list(mem, mz, l);
}
+ cond_resched();
}
ret = 0;
out:
@@ -1012,14 +962,27 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
}
/* showing # of active pages */
{
- unsigned long active, inactive;
-
- inactive = mem_cgroup_get_all_zonestat(mem_cont,
- MEM_CGROUP_ZSTAT_INACTIVE);
- active = mem_cgroup_get_all_zonestat(mem_cont,
- MEM_CGROUP_ZSTAT_ACTIVE);
- cb->fill(cb, "active", (active) * PAGE_SIZE);
- cb->fill(cb, "inactive", (inactive) * PAGE_SIZE);
+ unsigned long active_anon, inactive_anon;
+ unsigned long active_file, inactive_file;
+ unsigned long unevictable;
+
+ inactive_anon = mem_cgroup_get_all_zonestat(mem_cont,
+ LRU_INACTIVE_ANON);
+ active_anon = mem_cgroup_get_all_zonestat(mem_cont,
+ LRU_ACTIVE_ANON);
+ inactive_file = mem_cgroup_get_all_zonestat(mem_cont,
+ LRU_INACTIVE_FILE);
+ active_file = mem_cgroup_get_all_zonestat(mem_cont,
+ LRU_ACTIVE_FILE);
+ unevictable = mem_cgroup_get_all_zonestat(mem_cont,
+ LRU_UNEVICTABLE);
+
+ cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE);
+ cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE);
+ cb->fill(cb, "active_file", (active_file) * PAGE_SIZE);
+ cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE);
+ cb->fill(cb, "unevictable", unevictable * PAGE_SIZE);
+
}
return 0;
}
@@ -1062,6 +1025,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
{
struct mem_cgroup_per_node *pn;
struct mem_cgroup_per_zone *mz;
+ enum lru_list l;
int zone, tmp = node;
/*
* This routine is called against possible nodes.
@@ -1082,9 +1046,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
mz = &pn->zoneinfo[zone];
- INIT_LIST_HEAD(&mz->active_list);
- INIT_LIST_HEAD(&mz->inactive_list);
spin_lock_init(&mz->lru_lock);
+ for_each_lru(l)
+ INIT_LIST_HEAD(&mz->lists[l]);
}
return 0;
}
@@ -1124,8 +1088,8 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
int node;
if (unlikely((cont->parent) == NULL)) {
+ page_cgroup_init();
mem = &init_mem_cgroup;
- page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC);
} else {
mem = mem_cgroup_alloc();
if (!mem)
diff --git a/mm/memory.c b/mm/memory.c
index 1002f473f49..3a6c4a65832 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -64,6 +64,8 @@
#include "internal.h"
+#include "internal.h"
+
#ifndef CONFIG_NEED_MULTIPLE_NODES
/* use the per-pgdat data instead for discontigmem - mbligh */
unsigned long max_mapnr;
@@ -1129,12 +1131,17 @@ static inline int use_zero_page(struct vm_area_struct *vma)
return !vma->vm_ops || !vma->vm_ops->fault;
}
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long start, int len, int write, int force,
+
+
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int len, int flags,
struct page **pages, struct vm_area_struct **vmas)
{
int i;
- unsigned int vm_flags;
+ unsigned int vm_flags = 0;
+ int write = !!(flags & GUP_FLAGS_WRITE);
+ int force = !!(flags & GUP_FLAGS_FORCE);
+ int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
if (len <= 0)
return 0;
@@ -1158,7 +1165,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- if (write) /* user gate pages are read-only */
+
+ /* user gate pages are read-only */
+ if (!ignore && write)
return i ? : -EFAULT;
if (pg > TASK_SIZE)
pgd = pgd_offset_k(pg);
@@ -1190,8 +1199,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
continue;
}
- if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
- || !(vm_flags & vma->vm_flags))
+ if (!vma ||
+ (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
+ (!ignore && !(vm_flags & vma->vm_flags)))
return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) {
@@ -1266,6 +1276,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
} while (len);
return i;
}
+
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int len, int write, int force,
+ struct page **pages, struct vm_area_struct **vmas)
+{
+ int flags = 0;
+
+ if (write)
+ flags |= GUP_FLAGS_WRITE;
+ if (force)
+ flags |= GUP_FLAGS_FORCE;
+
+ return __get_user_pages(tsk, mm,
+ start, len, flags,
+ pages, vmas);
+}
+
EXPORT_SYMBOL(get_user_pages);
pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -1296,18 +1323,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
pte_t *pte;
spinlock_t *ptl;
- retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
- if (retval)
- goto out;
-
retval = -EINVAL;
if (PageAnon(page))
- goto out_uncharge;
+ goto out;
retval = -ENOMEM;
flush_dcache_page(page);
pte = get_locked_pte(mm, addr, &ptl);
if (!pte)
- goto out_uncharge;
+ goto out;
retval = -EBUSY;
if (!pte_none(*pte))
goto out_unlock;
@@ -1323,8 +1346,6 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
return retval;
out_unlock:
pte_unmap_unlock(pte, ptl);
-out_uncharge:
- mem_cgroup_uncharge_page(page);
out:
return retval;
}
@@ -1858,6 +1879,15 @@ gotten:
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
if (!new_page)
goto oom;
+ /*
+ * Don't let another task, with possibly unlocked vma,
+ * keep the mlocked page.
+ */
+ if (vma->vm_flags & VM_LOCKED) {
+ lock_page(old_page); /* for LRU manipulation */
+ clear_page_mlock(old_page);
+ unlock_page(old_page);
+ }
cow_user_page(new_page, old_page, address, vma);
__SetPageUptodate(new_page);
@@ -1886,11 +1916,13 @@ gotten:
* thread doing COW.
*/
ptep_clear_flush_notify(vma, address, page_table);
- set_pte_at(mm, address, page_table, entry);
- update_mmu_cache(vma, address, entry);
- lru_cache_add_active(new_page);
+ SetPageSwapBacked(new_page);
+ lru_cache_add_active_or_unevictable(new_page, vma);
page_add_new_anon_rmap(new_page, vma, address);
+//TODO: is this safe? do_anonymous_page() does it this way.
+ set_pte_at(mm, address, page_table, entry);
+ update_mmu_cache(vma, address, entry);
if (old_page) {
/*
* Only after switching the pte to the new page may
@@ -2288,16 +2320,17 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(PGMAJFAULT);
}
+ mark_page_accessed(page);
+
+ lock_page(page);
+ delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+
if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
- delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
ret = VM_FAULT_OOM;
+ unlock_page(page);
goto out;
}
- mark_page_accessed(page);
- lock_page(page);
- delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
-
/*
* Back out if somebody else already faulted in this pte.
*/
@@ -2324,7 +2357,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
page_add_anon_rmap(page, vma, address);
swap_free(entry);
- if (vm_swap_full())
+ if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
remove_exclusive_swap_page(page);
unlock_page(page);
@@ -2382,7 +2415,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pte_none(*page_table))
goto release;
inc_mm_counter(mm, anon_rss);
- lru_cache_add_active(page);
+ SetPageSwapBacked(page);
+ lru_cache_add_active_or_unevictable(page, vma);
page_add_new_anon_rmap(page, vma, address);
set_pte_at(mm, address, page_table, entry);
@@ -2423,6 +2457,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page;
pte_t entry;
int anon = 0;
+ int charged = 0;
struct page *dirty_page = NULL;
struct vm_fault vmf;
int ret;
@@ -2463,6 +2498,18 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
ret = VM_FAULT_OOM;
goto out;
}
+ if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
+ ret = VM_FAULT_OOM;
+ page_cache_release(page);
+ goto out;
+ }
+ charged = 1;
+ /*
+ * Don't let another task, with possibly unlocked vma,
+ * keep the mlocked page.
+ */
+ if (vma->vm_flags & VM_LOCKED)
+ clear_page_mlock(vmf.page);
copy_user_highpage(page, vmf.page, address, vma);
__SetPageUptodate(page);
} else {
@@ -2497,11 +2544,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
- if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
- ret = VM_FAULT_OOM;
- goto out;
- }
-
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
/*
@@ -2520,11 +2562,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
entry = mk_pte(page, vma->vm_page_prot);
if (flags & FAULT_FLAG_WRITE)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- set_pte_at(mm, address, page_table, entry);
if (anon) {
- inc_mm_counter(mm, anon_rss);
- lru_cache_add_active(page);
- page_add_new_anon_rmap(page, vma, address);
+ inc_mm_counter(mm, anon_rss);
+ SetPageSwapBacked(page);
+ lru_cache_add_active_or_unevictable(page, vma);
+ page_add_new_anon_rmap(page, vma, address);
} else {
inc_mm_counter(mm, file_rss);
page_add_file_rmap(page);
@@ -2533,11 +2575,14 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
get_page(dirty_page);
}
}
+//TODO: is this safe? do_anonymous_page() does it this way.
+ set_pte_at(mm, address, page_table, entry);
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, address, entry);
} else {
- mem_cgroup_uncharge_page(page);
+ if (charged)
+ mem_cgroup_uncharge_page(page);
if (anon)
page_cache_release(page);
else
@@ -2772,19 +2817,9 @@ int make_pages_present(unsigned long addr, unsigned long end)
len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
ret = get_user_pages(current, current->mm, addr,
len, write, 0, NULL, NULL);
- if (ret < 0) {
- /*
- SUS require strange return value to mlock
- - invalid addr generate to ENOMEM.
- - out of memory should generate EAGAIN.
- */
- if (ret == -EFAULT)
- ret = -ENOMEM;
- else if (ret == -ENOMEM)
- ret = -EAGAIN;
+ if (ret < 0)
return ret;
- }
- return ret == len ? 0 : -ENOMEM;
+ return ret == len ? 0 : -EFAULT;
}
#if !defined(__HAVE_ARCH_GATE_AREA)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 89fee2dcb03..6837a101437 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -26,6 +26,7 @@
#include <linux/delay.h>
#include <linux/migrate.h>
#include <linux/page-isolation.h>
+#include <linux/pfn.h>
#include <asm/tlbflush.h>
@@ -323,11 +324,11 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
BUG_ON(nr_pages % PAGES_PER_SECTION);
- release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
-
sections_to_remove = nr_pages / PAGES_PER_SECTION;
for (i = 0; i < sections_to_remove; i++) {
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+ release_mem_region(pfn << PAGE_SHIFT,
+ PAGES_PER_SECTION << PAGE_SHIFT);
ret = __remove_section(zone, __pfn_to_section(pfn));
if (ret)
break;
@@ -657,8 +658,9 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
* We can skip free pages. And we can only deal with pages on
* LRU.
*/
- ret = isolate_lru_page(page, &source);
+ ret = isolate_lru_page(page);
if (!ret) { /* Success */
+ list_add_tail(&page->lru, &source);
move_pages--;
} else {
/* Becasue we don't have big zone->lock. we should
@@ -849,10 +851,19 @@ failed_removal:
return ret;
}
+
+int remove_memory(u64 start, u64 size)
+{
+ unsigned long start_pfn, end_pfn;
+
+ start_pfn = PFN_DOWN(start);
+ end_pfn = start_pfn + PFN_DOWN(size);
+ return offline_pages(start_pfn, end_pfn, 120 * HZ);
+}
#else
int remove_memory(u64 start, u64 size)
{
return -EINVAL;
}
-EXPORT_SYMBOL_GPL(remove_memory);
#endif /* CONFIG_MEMORY_HOTREMOVE */
+EXPORT_SYMBOL_GPL(remove_memory);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 83369058ec1..36f42573a33 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -93,6 +93,8 @@
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
+#include "internal.h"
+
/* Internal flags */
#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */
#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
@@ -762,8 +764,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
/*
* Avoid migrating a page that is shared with others.
*/
- if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1)
- isolate_lru_page(page, pagelist);
+ if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
+ if (!isolate_lru_page(page)) {
+ list_add_tail(&page->lru, pagelist);
+ }
+ }
}
static struct page *new_node_page(struct page *page, unsigned long node, int **x)
@@ -2197,7 +2202,7 @@ static void gather_stats(struct page *page, void *private, int pte_dirty)
if (PageSwapCache(page))
md->swapcache++;
- if (PageActive(page))
+ if (PageActive(page) || PageUnevictable(page))
md->active++;
if (PageWriteback(page))
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a80136b23b..6602941bfab 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -37,36 +37,6 @@
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
/*
- * Isolate one page from the LRU lists. If successful put it onto
- * the indicated list with elevated page count.
- *
- * Result:
- * -EBUSY: page not on LRU list
- * 0: page removed from LRU list and added to the specified list.
- */
-int isolate_lru_page(struct page *page, struct list_head *pagelist)
-{
- int ret = -EBUSY;
-
- if (PageLRU(page)) {
- struct zone *zone = page_zone(page);
-
- spin_lock_irq(&zone->lru_lock);
- if (PageLRU(page) && get_page_unless_zero(page)) {
- ret = 0;
- ClearPageLRU(page);
- if (PageActive(page))
- del_page_from_active_list(zone, page);
- else
- del_page_from_inactive_list(zone, page);
- list_add_tail(&page->lru, pagelist);
- }
- spin_unlock_irq(&zone->lru_lock);
- }
- return ret;
-}
-
-/*
* migrate_prep() needs to be called before we start compiling a list of pages
* to be migrated using isolate_lru_page().
*/
@@ -83,23 +53,9 @@ int migrate_prep(void)
return 0;
}
-static inline void move_to_lru(struct page *page)
-{
- if (PageActive(page)) {
- /*
- * lru_cache_add_active checks that
- * the PG_active bit is off.
- */
- ClearPageActive(page);
- lru_cache_add_active(page);
- } else {
- lru_cache_add(page);
- }
- put_page(page);
-}
-
/*
- * Add isolated pages on the list back to the LRU.
+ * Add isolated pages on the list back to the LRU under page lock
+ * to avoid leaking evictable pages back onto unevictable list.
*
* returns the number of pages put back.
*/
@@ -111,7 +67,7 @@ int putback_lru_pages(struct list_head *l)
list_for_each_entry_safe(page, page2, l, lru) {
list_del(&page->lru);
- move_to_lru(page);
+ putback_lru_page(page);
count++;
}
return count;
@@ -374,8 +330,6 @@ static int migrate_page_move_mapping(struct address_space *mapping,
__inc_zone_page_state(newpage, NR_FILE_PAGES);
spin_unlock_irq(&mapping->tree_lock);
- if (!PageSwapCache(newpage))
- mem_cgroup_uncharge_cache_page(page);
return 0;
}
@@ -385,6 +339,8 @@ static int migrate_page_move_mapping(struct address_space *mapping,
*/
static void migrate_page_copy(struct page *newpage, struct page *page)
{
+ int anon;
+
copy_highpage(newpage, page);
if (PageError(page))
@@ -393,8 +349,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
SetPageReferenced(newpage);
if (PageUptodate(page))
SetPageUptodate(newpage);
- if (PageActive(page))
+ if (TestClearPageActive(page)) {
+ VM_BUG_ON(PageUnevictable(page));
SetPageActive(newpage);
+ } else
+ unevictable_migrate_page(newpage, page);
if (PageChecked(page))
SetPageChecked(newpage);
if (PageMappedToDisk(page))
@@ -412,14 +371,20 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
__set_page_dirty_nobuffers(newpage);
}
+ mlock_migrate_page(newpage, page);
+
#ifdef CONFIG_SWAP
ClearPageSwapCache(page);
#endif
- ClearPageActive(page);
ClearPagePrivate(page);
set_page_private(page, 0);
+ /* page->mapping contains a flag for PageAnon() */
+ anon = PageAnon(page);
page->mapping = NULL;
+ if (!anon) /* This page was removed from radix-tree. */
+ mem_cgroup_uncharge_cache_page(page);
+
/*
* If any waiters have accumulated on the new page then
* wake them up.
@@ -594,6 +559,10 @@ static int fallback_migrate_page(struct address_space *mapping,
*
* The new page will have replaced the old page if this function
* is successful.
+ *
+ * Return value:
+ * < 0 - error code
+ * == 0 - success
*/
static int move_to_new_page(struct page *newpage, struct page *page)
{
@@ -611,6 +580,8 @@ static int move_to_new_page(struct page *newpage, struct page *page)
/* Prepare mapping for the new page.*/
newpage->index = page->index;
newpage->mapping = page->mapping;
+ if (PageSwapBacked(page))
+ SetPageSwapBacked(newpage);
mapping = page_mapping(page);
if (!mapping)
@@ -654,9 +625,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
if (!newpage)
return -ENOMEM;
- if (page_count(page) == 1)
+ if (page_count(page) == 1) {
/* page was freed from under us. So we are done. */
goto move_newpage;
+ }
charge = mem_cgroup_prepare_migration(page, newpage);
if (charge == -ENOMEM) {
@@ -730,7 +702,6 @@ rcu_unlock:
rcu_read_unlock();
unlock:
-
unlock_page(page);
if (rc != -EAGAIN) {
@@ -741,17 +712,19 @@ unlock:
* restored.
*/
list_del(&page->lru);
- move_to_lru(page);
+ putback_lru_page(page);
}
move_newpage:
if (!charge)
mem_cgroup_end_migration(newpage);
+
/*
* Move the new page to the LRU. If migration was not successful
* then this will free the page.
*/
- move_to_lru(newpage);
+ putback_lru_page(newpage);
+
if (result) {
if (rc)
*result = rc;
@@ -858,9 +831,11 @@ static struct page *new_page_node(struct page *p, unsigned long private,
* Move a set of pages as indicated in the pm array. The addr
* field must be set to the virtual address of the page to be moved
* and the node number must contain a valid target node.
+ * The pm array ends with node = MAX_NUMNODES.
*/
-static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
- int migrate_all)
+static int do_move_page_to_node_array(struct mm_struct *mm,
+ struct page_to_node *pm,
+ int migrate_all)
{
int err;
struct page_to_node *pp;
@@ -914,7 +889,9 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
!migrate_all)
goto put_and_set;
- err = isolate_lru_page(page, &pagelist);
+ err = isolate_lru_page(page);
+ if (!err)
+ list_add_tail(&page->lru, &pagelist);
put_and_set:
/*
* Either remove the duplicate refcount from
@@ -926,36 +903,118 @@ set_status:
pp->status = err;
}
+ err = 0;
if (!list_empty(&pagelist))
err = migrate_pages(&pagelist, new_page_node,
(unsigned long)pm);
- else
- err = -ENOENT;
up_read(&mm->mmap_sem);
return err;
}
/*
- * Determine the nodes of a list of pages. The addr in the pm array
- * must have been set to the virtual address of which we want to determine
- * the node number.
+ * Migrate an array of page address onto an array of nodes and fill
+ * the corresponding array of status.
*/
-static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
+static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
+ unsigned long nr_pages,
+ const void __user * __user *pages,
+ const int __user *nodes,
+ int __user *status, int flags)
{
+ struct page_to_node *pm = NULL;
+ nodemask_t task_nodes;
+ int err = 0;
+ int i;
+
+ task_nodes = cpuset_mems_allowed(task);
+
+ /* Limit nr_pages so that the multiplication may not overflow */
+ if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
+ err = -E2BIG;
+ goto out;
+ }
+
+ pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
+ if (!pm) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Get parameters from user space and initialize the pm
+ * array. Return various errors if the user did something wrong.
+ */
+ for (i = 0; i < nr_pages; i++) {
+ const void __user *p;
+
+ err = -EFAULT;
+ if (get_user(p, pages + i))
+ goto out_pm;
+
+ pm[i].addr = (unsigned long)p;
+ if (nodes) {
+ int node;
+
+ if (get_user(node, nodes + i))
+ goto out_pm;
+
+ err = -ENODEV;
+ if (!node_state(node, N_HIGH_MEMORY))
+ goto out_pm;
+
+ err = -EACCES;
+ if (!node_isset(node, task_nodes))
+ goto out_pm;
+
+ pm[i].node = node;
+ } else
+ pm[i].node = 0; /* anything to not match MAX_NUMNODES */
+ }
+ /* End marker */
+ pm[nr_pages].node = MAX_NUMNODES;
+
+ err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
+ if (err >= 0)
+ /* Return status information */
+ for (i = 0; i < nr_pages; i++)
+ if (put_user(pm[i].status, status + i))
+ err = -EFAULT;
+
+out_pm:
+ vfree(pm);
+out:
+ return err;
+}
+
+/*
+ * Determine the nodes of an array of pages and store it in an array of status.
+ */
+static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
+ const void __user * __user *pages,
+ int __user *status)
+{
+ unsigned long i;
+ int err;
+
down_read(&mm->mmap_sem);
- for ( ; pm->node != MAX_NUMNODES; pm++) {
+ for (i = 0; i < nr_pages; i++) {
+ const void __user *p;
+ unsigned long addr;
struct vm_area_struct *vma;
struct page *page;
- int err;
err = -EFAULT;
- vma = find_vma(mm, pm->addr);
+ if (get_user(p, pages+i))
+ goto out;
+ addr = (unsigned long) p;
+
+ vma = find_vma(mm, addr);
if (!vma)
goto set_status;
- page = follow_page(vma, pm->addr, 0);
+ page = follow_page(vma, addr, 0);
err = PTR_ERR(page);
if (IS_ERR(page))
@@ -968,11 +1027,13 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
err = page_to_nid(page);
set_status:
- pm->status = err;
+ put_user(err, status+i);
}
+ err = 0;
+out:
up_read(&mm->mmap_sem);
- return 0;
+ return err;
}
/*
@@ -984,12 +1045,9 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
const int __user *nodes,
int __user *status, int flags)
{
- int err = 0;
- int i;
struct task_struct *task;
- nodemask_t task_nodes;
struct mm_struct *mm;
- struct page_to_node *pm = NULL;
+ int err;
/* Check flags */
if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1021,75 +1079,21 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
(current->uid != task->suid) && (current->uid != task->uid) &&
!capable(CAP_SYS_NICE)) {
err = -EPERM;
- goto out2;
+ goto out;
}
err = security_task_movememory(task);
if (err)
- goto out2;
-
-
- task_nodes = cpuset_mems_allowed(task);
-
- /* Limit nr_pages so that the multiplication may not overflow */
- if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
- err = -E2BIG;
- goto out2;
- }
-
- pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
- if (!pm) {
- err = -ENOMEM;
- goto out2;
- }
-
- /*
- * Get parameters from user space and initialize the pm
- * array. Return various errors if the user did something wrong.
- */
- for (i = 0; i < nr_pages; i++) {
- const void __user *p;
-
- err = -EFAULT;
- if (get_user(p, pages + i))
- goto out;
-
- pm[i].addr = (unsigned long)p;
- if (nodes) {
- int node;
-
- if (get_user(node, nodes + i))
- goto out;
-
- err = -ENODEV;
- if (!node_state(node, N_HIGH_MEMORY))
- goto out;
-
- err = -EACCES;
- if (!node_isset(node, task_nodes))
- goto out;
+ goto out;
- pm[i].node = node;
- } else
- pm[i].node = 0; /* anything to not match MAX_NUMNODES */
+ if (nodes) {
+ err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
+ flags);
+ } else {
+ err = do_pages_stat(mm, nr_pages, pages, status);
}
- /* End marker */
- pm[nr_pages].node = MAX_NUMNODES;
-
- if (nodes)
- err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL);
- else
- err = do_pages_stat(mm, pm);
-
- if (err >= 0)
- /* Return status information */
- for (i = 0; i < nr_pages; i++)
- if (put_user(pm[i].status, status + i))
- err = -EFAULT;
out:
- vfree(pm);
-out2:
mmput(mm);
return err;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index 01fbe93eff5..008ea70b7af 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -8,10 +8,18 @@
#include <linux/capability.h>
#include <linux/mman.h>
#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pagemap.h>
#include <linux/mempolicy.h>
#include <linux/syscalls.h>
#include <linux/sched.h>
#include <linux/module.h>
+#include <linux/rmap.h>
+#include <linux/mmzone.h>
+#include <linux/hugetlb.h>
+
+#include "internal.h"
int can_do_mlock(void)
{
@@ -23,17 +31,381 @@ int can_do_mlock(void)
}
EXPORT_SYMBOL(can_do_mlock);
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * Mlocked pages are marked with PageMlocked() flag for efficient testing
+ * in vmscan and, possibly, the fault path; and to support semi-accurate
+ * statistics.
+ *
+ * An mlocked page [PageMlocked(page)] is unevictable. As such, it will
+ * be placed on the LRU "unevictable" list, rather than the [in]active lists.
+ * The unevictable list is an LRU sibling list to the [in]active lists.
+ * PageUnevictable is set to indicate the unevictable state.
+ *
+ * When lazy mlocking via vmscan, it is important to ensure that the
+ * vma's VM_LOCKED status is not concurrently being modified, otherwise we
+ * may have mlocked a page that is being munlocked. So lazy mlock must take
+ * the mmap_sem for read, and verify that the vma really is locked
+ * (see mm/rmap.c).
+ */
+
+/*
+ * LRU accounting for clear_page_mlock()
+ */
+void __clear_page_mlock(struct page *page)
+{
+ VM_BUG_ON(!PageLocked(page));
+
+ if (!page->mapping) { /* truncated ? */
+ return;
+ }
+
+ dec_zone_page_state(page, NR_MLOCK);
+ count_vm_event(UNEVICTABLE_PGCLEARED);
+ if (!isolate_lru_page(page)) {
+ putback_lru_page(page);
+ } else {
+ /*
+ * Page not on the LRU yet. Flush all pagevecs and retry.
+ */
+ lru_add_drain_all();
+ if (!isolate_lru_page(page))
+ putback_lru_page(page);
+ else if (PageUnevictable(page))
+ count_vm_event(UNEVICTABLE_PGSTRANDED);
+
+ }
+}
+
+/*
+ * Mark page as mlocked if not already.
+ * If page on LRU, isolate and putback to move to unevictable list.
+ */
+void mlock_vma_page(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+
+ if (!TestSetPageMlocked(page)) {
+ inc_zone_page_state(page, NR_MLOCK);
+ count_vm_event(UNEVICTABLE_PGMLOCKED);
+ if (!isolate_lru_page(page))
+ putback_lru_page(page);
+ }
+}
+
+/*
+ * called from munlock()/munmap() path with page supposedly on the LRU.
+ *
+ * Note: unlike mlock_vma_page(), we can't just clear the PageMlocked
+ * [in try_to_munlock()] and then attempt to isolate the page. We must
+ * isolate the page to keep others from messing with its unevictable
+ * and mlocked state while trying to munlock. However, we pre-clear the
+ * mlocked state anyway as we might lose the isolation race and we might
+ * not get another chance to clear PageMlocked. If we successfully
+ * isolate the page and try_to_munlock() detects other VM_LOCKED vmas
+ * mapping the page, it will restore the PageMlocked state, unless the page
+ * is mapped in a non-linear vma. So, we go ahead and SetPageMlocked(),
+ * perhaps redundantly.
+ * If we lose the isolation race, and the page is mapped by other VM_LOCKED
+ * vmas, we'll detect this in vmscan--via try_to_munlock() or try_to_unmap()
+ * either of which will restore the PageMlocked state by calling
+ * mlock_vma_page() above, if it can grab the vma's mmap sem.
+ */
+static void munlock_vma_page(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+
+ if (TestClearPageMlocked(page)) {
+ dec_zone_page_state(page, NR_MLOCK);
+ if (!isolate_lru_page(page)) {
+ int ret = try_to_munlock(page);
+ /*
+ * did try_to_unlock() succeed or punt?
+ */
+ if (ret == SWAP_SUCCESS || ret == SWAP_AGAIN)
+ count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+
+ putback_lru_page(page);
+ } else {
+ /*
+ * We lost the race. let try_to_unmap() deal
+ * with it. At least we get the page state and
+ * mlock stats right. However, page is still on
+ * the noreclaim list. We'll fix that up when
+ * the page is eventually freed or we scan the
+ * noreclaim list.
+ */
+ if (PageUnevictable(page))
+ count_vm_event(UNEVICTABLE_PGSTRANDED);
+ else
+ count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+ }
+ }
+}
+
+/**
+ * __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma.
+ * @vma: target vma
+ * @start: start address
+ * @end: end address
+ * @mlock: 0 indicate munlock, otherwise mlock.
+ *
+ * If @mlock == 0, unlock an mlocked range;
+ * else mlock the range of pages. This takes care of making the pages present ,
+ * too.
+ *
+ * return 0 on success, negative error code on error.
+ *
+ * vma->vm_mm->mmap_sem must be held for at least read.
+ */
+static long __mlock_vma_pages_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ int mlock)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long addr = start;
+ struct page *pages[16]; /* 16 gives a reasonable batch */
+ int nr_pages = (end - start) / PAGE_SIZE;
+ int ret;
+ int gup_flags = 0;
+
+ VM_BUG_ON(start & ~PAGE_MASK);
+ VM_BUG_ON(end & ~PAGE_MASK);
+ VM_BUG_ON(start < vma->vm_start);
+ VM_BUG_ON(end > vma->vm_end);
+ VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) &&
+ (atomic_read(&mm->mm_users) != 0));
+
+ /*
+ * mlock: don't page populate if page has PROT_NONE permission.
+ * munlock: the pages always do munlock althrough
+ * its has PROT_NONE permission.
+ */
+ if (!mlock)
+ gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS;
+
+ if (vma->vm_flags & VM_WRITE)
+ gup_flags |= GUP_FLAGS_WRITE;
+
+ lru_add_drain_all(); /* push cached pages to LRU */
+
+ while (nr_pages > 0) {
+ int i;
+
+ cond_resched();
+
+ /*
+ * get_user_pages makes pages present if we are
+ * setting mlock. and this extra reference count will
+ * disable migration of this page. However, page may
+ * still be truncated out from under us.
+ */
+ ret = __get_user_pages(current, mm, addr,
+ min_t(int, nr_pages, ARRAY_SIZE(pages)),
+ gup_flags, pages, NULL);
+ /*
+ * This can happen for, e.g., VM_NONLINEAR regions before
+ * a page has been allocated and mapped at a given offset,
+ * or for addresses that map beyond end of a file.
+ * We'll mlock the the pages if/when they get faulted in.
+ */
+ if (ret < 0)
+ break;
+ if (ret == 0) {
+ /*
+ * We know the vma is there, so the only time
+ * we cannot get a single page should be an
+ * error (ret < 0) case.
+ */
+ WARN_ON(1);
+ break;
+ }
+
+ lru_add_drain(); /* push cached pages to LRU */
+
+ for (i = 0; i < ret; i++) {
+ struct page *page = pages[i];
+
+ lock_page(page);
+ /*
+ * Because we lock page here and migration is blocked
+ * by the elevated reference, we need only check for
+ * page truncation (file-cache only).
+ */
+ if (page->mapping) {
+ if (mlock)
+ mlock_vma_page(page);
+ else
+ munlock_vma_page(page);
+ }
+ unlock_page(page);
+ put_page(page); /* ref from get_user_pages() */
+
+ /*
+ * here we assume that get_user_pages() has given us
+ * a list of virtually contiguous pages.
+ */
+ addr += PAGE_SIZE; /* for next get_user_pages() */
+ nr_pages--;
+ }
+ ret = 0;
+ }
+
+ lru_add_drain_all(); /* to update stats */
+
+ return ret; /* count entire vma as locked_vm */
+}
+
+/*
+ * convert get_user_pages() return value to posix mlock() error
+ */
+static int __mlock_posix_error_return(long retval)
+{
+ if (retval == -EFAULT)
+ retval = -ENOMEM;
+ else if (retval == -ENOMEM)
+ retval = -EAGAIN;
+ return retval;
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+
+/*
+ * Just make pages present if VM_LOCKED. No-op if unlocking.
+ */
+static long __mlock_vma_pages_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ int mlock)
+{
+ if (mlock && (vma->vm_flags & VM_LOCKED))
+ return make_pages_present(start, end);
+ return 0;
+}
+
+static inline int __mlock_posix_error_return(long retval)
+{
+ return 0;
+}
+
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
+/**
+ * mlock_vma_pages_range() - mlock pages in specified vma range.
+ * @vma - the vma containing the specfied address range
+ * @start - starting address in @vma to mlock
+ * @end - end address [+1] in @vma to mlock
+ *
+ * For mmap()/mremap()/expansion of mlocked vma.
+ *
+ * return 0 on success for "normal" vmas.
+ *
+ * return number of pages [> 0] to be removed from locked_vm on success
+ * of "special" vmas.
+ *
+ * return negative error if vma spanning @start-@range disappears while
+ * mmap semaphore is dropped. Unlikely?
+ */
+long mlock_vma_pages_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ int nr_pages = (end - start) / PAGE_SIZE;
+ BUG_ON(!(vma->vm_flags & VM_LOCKED));
+
+ /*
+ * filter unlockable vmas
+ */
+ if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+ goto no_mlock;
+
+ if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
+ is_vm_hugetlb_page(vma) ||
+ vma == get_gate_vma(current))) {
+ long error;
+ downgrade_write(&mm->mmap_sem);
+
+ error = __mlock_vma_pages_range(vma, start, end, 1);
+
+ up_read(&mm->mmap_sem);
+ /* vma can change or disappear */
+ down_write(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ /* non-NULL vma must contain @start, but need to check @end */
+ if (!vma || end > vma->vm_end)
+ return -ENOMEM;
+
+ return 0; /* hide other errors from mmap(), et al */
+ }
+
+ /*
+ * User mapped kernel pages or huge pages:
+ * make these pages present to populate the ptes, but
+ * fall thru' to reset VM_LOCKED--no need to unlock, and
+ * return nr_pages so these don't get counted against task's
+ * locked limit. huge pages are already counted against
+ * locked vm limit.
+ */
+ make_pages_present(start, end);
+
+no_mlock:
+ vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */
+ return nr_pages; /* error or pages NOT mlocked */
+}
+
+
+/*
+ * munlock_vma_pages_range() - munlock all pages in the vma range.'
+ * @vma - vma containing range to be munlock()ed.
+ * @start - start address in @vma of the range
+ * @end - end of range in @vma.
+ *
+ * For mremap(), munmap() and exit().
+ *
+ * Called with @vma VM_LOCKED.
+ *
+ * Returns with VM_LOCKED cleared. Callers must be prepared to
+ * deal with this.
+ *
+ * We don't save and restore VM_LOCKED here because pages are
+ * still on lru. In unmap path, pages might be scanned by reclaim
+ * and re-mlocked by try_to_{munlock|unmap} before we unmap and
+ * free them. This will result in freeing mlocked pages.
+ */
+void munlock_vma_pages_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ vma->vm_flags &= ~VM_LOCKED;
+ __mlock_vma_pages_range(vma, start, end, 0);
+}
+
+/*
+ * mlock_fixup - handle mlock[all]/munlock[all] requests.
+ *
+ * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
+ * munlock is a no-op. However, for some special vmas, we go ahead and
+ * populate the ptes via make_pages_present().
+ *
+ * For vmas that pass the filters, merge/split as appropriate.
+ */
static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
unsigned long start, unsigned long end, unsigned int newflags)
{
- struct mm_struct * mm = vma->vm_mm;
+ struct mm_struct *mm = vma->vm_mm;
pgoff_t pgoff;
- int pages;
+ int nr_pages;
int ret = 0;
-
- if (newflags == vma->vm_flags) {
- *prev = vma;
- goto out;
+ int lock = newflags & VM_LOCKED;
+
+ if (newflags == vma->vm_flags ||
+ (vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ goto out; /* don't set VM_LOCKED, don't count */
+
+ if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
+ is_vm_hugetlb_page(vma) ||
+ vma == get_gate_vma(current)) {
+ if (lock)
+ make_pages_present(start, end);
+ goto out; /* don't set VM_LOCKED, don't count */
}
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
@@ -44,8 +416,6 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
goto success;
}
- *prev = vma;
-
if (start != vma->vm_start) {
ret = split_vma(mm, vma, start, 1);
if (ret)
@@ -60,24 +430,61 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
success:
/*
+ * Keep track of amount of locked VM.
+ */
+ nr_pages = (end - start) >> PAGE_SHIFT;
+ if (!lock)
+ nr_pages = -nr_pages;
+ mm->locked_vm += nr_pages;
+
+ /*
* vm_flags is protected by the mmap_sem held in write mode.
* It's okay if try_to_unmap_one unmaps a page just after we
- * set VM_LOCKED, make_pages_present below will bring it back.
+ * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
*/
vma->vm_flags = newflags;
- /*
- * Keep track of amount of locked VM.
- */
- pages = (end - start) >> PAGE_SHIFT;
- if (newflags & VM_LOCKED) {
- pages = -pages;
- if (!(newflags & VM_IO))
- ret = make_pages_present(start, end);
+ if (lock) {
+ /*
+ * mmap_sem is currently held for write. Downgrade the write
+ * lock to a read lock so that other faults, mmap scans, ...
+ * while we fault in all pages.
+ */
+ downgrade_write(&mm->mmap_sem);
+
+ ret = __mlock_vma_pages_range(vma, start, end, 1);
+
+ /*
+ * Need to reacquire mmap sem in write mode, as our callers
+ * expect this. We have no support for atomically upgrading
+ * a sem to write, so we need to check for ranges while sem
+ * is unlocked.
+ */
+ up_read(&mm->mmap_sem);
+ /* vma can change or disappear */
+ down_write(&mm->mmap_sem);
+ *prev = find_vma(mm, start);
+ /* non-NULL *prev must contain @start, but need to check @end */
+ if (!(*prev) || end > (*prev)->vm_end)
+ ret = -ENOMEM;
+ else if (ret > 0) {
+ mm->locked_vm -= ret;
+ ret = 0;
+ } else
+ ret = __mlock_posix_error_return(ret); /* translate if needed */
+ } else {
+ /*
+ * TODO: for unlocking, pages will already be resident, so
+ * we don't need to wait for allocations/reclaim/pagein, ...
+ * However, unlocking a very large region can still take a
+ * while. Should we downgrade the semaphore for both lock
+ * AND unlock ?
+ */
+ __mlock_vma_pages_range(vma, start, end, 0);
}
- mm->locked_vm -= pages;
out:
+ *prev = vma;
return ret;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index e7a5a68a9c2..74f4d158022 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -410,7 +410,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
rb_insert_color(&vma->vm_rb, &mm->mm_rb);
}
-static inline void __vma_link_file(struct vm_area_struct *vma)
+static void __vma_link_file(struct vm_area_struct *vma)
{
struct file * file;
@@ -662,8 +662,6 @@ again: remove_next = 1 + (end > next->vm_end);
* If the vma has a ->close operation then the driver probably needs to release
* per-vma resources, so we don't attempt to merge those.
*/
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
-
static inline int is_mergeable_vma(struct vm_area_struct *vma,
struct file *file, unsigned long vm_flags)
{
@@ -972,6 +970,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
return -EPERM;
vm_flags |= VM_LOCKED;
}
+
/* mlock MCL_FUTURE? */
if (vm_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
@@ -1139,10 +1138,12 @@ munmap_back:
* The VM_SHARED test is necessary because shmem_zero_setup
* will create the file object for a shared anonymous map below.
*/
- if (!file && !(vm_flags & VM_SHARED) &&
- vma_merge(mm, prev, addr, addr + len, vm_flags,
- NULL, NULL, pgoff, NULL))
- goto out;
+ if (!file && !(vm_flags & VM_SHARED)) {
+ vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
+ NULL, NULL, pgoff, NULL);
+ if (vma)
+ goto out;
+ }
/*
* Determine the object being mapped and call the appropriate
@@ -1224,10 +1225,14 @@ out:
mm->total_vm += len >> PAGE_SHIFT;
vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
- mm->locked_vm += len >> PAGE_SHIFT;
- make_pages_present(addr, addr + len);
- }
- if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
+ /*
+ * makes pages present; downgrades, drops, reacquires mmap_sem
+ */
+ long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
+ if (nr_pages < 0)
+ return nr_pages; /* vma gone! */
+ mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
+ } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
make_pages_present(addr, addr + len);
return addr;
@@ -1586,7 +1591,7 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
* vma is the last one with address > vma->vm_end. Have to extend vma.
*/
#ifndef CONFIG_IA64
-static inline
+static
#endif
int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
@@ -1636,7 +1641,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
/*
* vma is the first one with address < vma->vm_start. Have to extend vma.
*/
-static inline int expand_downwards(struct vm_area_struct *vma,
+static int expand_downwards(struct vm_area_struct *vma,
unsigned long address)
{
int error;
@@ -1698,10 +1703,12 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
vma = find_vma_prev(mm, addr, &prev);
if (vma && (vma->vm_start <= addr))
return vma;
- if (!prev || expand_stack(prev, addr))
+ if (expand_stack(prev, addr))
return NULL;
- if (prev->vm_flags & VM_LOCKED)
- make_pages_present(addr, prev->vm_end);
+ if (prev->vm_flags & VM_LOCKED) {
+ if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
+ return NULL; /* vma gone! */
+ }
return prev;
}
#else
@@ -1727,8 +1734,10 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
start = vma->vm_start;
if (expand_stack(vma, addr))
return NULL;
- if (vma->vm_flags & VM_LOCKED)
- make_pages_present(addr, start);
+ if (vma->vm_flags & VM_LOCKED) {
+ if (mlock_vma_pages_range(vma, addr, start) < 0)
+ return NULL; /* vma gone! */
+ }
return vma;
}
#endif
@@ -1747,8 +1756,6 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
long nrpages = vma_pages(vma);
mm->total_vm -= nrpages;
- if (vma->vm_flags & VM_LOCKED)
- mm->locked_vm -= nrpages;
vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
vma = remove_vma(vma);
} while (vma);
@@ -1914,6 +1921,20 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
vma = prev? prev->vm_next: mm->mmap;
/*
+ * unlock any mlock()ed ranges before detaching vmas
+ */
+ if (mm->locked_vm) {
+ struct vm_area_struct *tmp = vma;
+ while (tmp && tmp->vm_start < end) {
+ if (tmp->vm_flags & VM_LOCKED) {
+ mm->locked_vm -= vma_pages(tmp);
+ munlock_vma_pages_all(tmp);
+ }
+ tmp = tmp->vm_next;
+ }
+ }
+
+ /*
* Remove the vma's, and unmap the actual pages
*/
detach_vmas_to_be_unmapped(mm, vma, prev, end);
@@ -2025,8 +2046,9 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
return -ENOMEM;
/* Can we just expand an old private anonymous mapping? */
- if (vma_merge(mm, prev, addr, addr + len, flags,
- NULL, NULL, pgoff, NULL))
+ vma = vma_merge(mm, prev, addr, addr + len, flags,
+ NULL, NULL, pgoff, NULL);
+ if (vma)
goto out;
/*
@@ -2048,8 +2070,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
out:
mm->total_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED) {
- mm->locked_vm += len >> PAGE_SHIFT;
- make_pages_present(addr, addr + len);
+ if (!mlock_vma_pages_range(vma, addr, addr + len))
+ mm->locked_vm += (len >> PAGE_SHIFT);
}
return addr;
}
@@ -2060,7 +2082,7 @@ EXPORT_SYMBOL(do_brk);
void exit_mmap(struct mm_struct *mm)
{
struct mmu_gather *tlb;
- struct vm_area_struct *vma = mm->mmap;
+ struct vm_area_struct *vma;
unsigned long nr_accounted = 0;
unsigned long end;
@@ -2068,6 +2090,15 @@ void exit_mmap(struct mm_struct *mm)
arch_exit_mmap(mm);
mmu_notifier_release(mm);
+ if (mm->locked_vm) {
+ vma = mm->mmap;
+ while (vma) {
+ if (vma->vm_flags & VM_LOCKED)
+ munlock_vma_pages_all(vma);
+ vma = vma->vm_next;
+ }
+ }
+ vma = mm->mmap;
lru_add_drain();
flush_cache_mm(mm);
tlb = tlb_gather_mmu(mm, 1);
diff --git a/mm/mremap.c b/mm/mremap.c
index 1a7743923c8..58a2908f42f 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -24,6 +24,8 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include "internal.h"
+
static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
@@ -238,8 +240,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (vm_flags & VM_LOCKED) {
mm->locked_vm += new_len >> PAGE_SHIFT;
if (new_len > old_len)
- make_pages_present(new_addr + old_len,
- new_addr + new_len);
+ mlock_vma_pages_range(new_vma, new_addr + old_len,
+ new_addr + new_len);
}
return new_addr;
@@ -379,7 +381,7 @@ unsigned long do_mremap(unsigned long addr,
vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
if (vma->vm_flags & VM_LOCKED) {
mm->locked_vm += pages;
- make_pages_present(addr + old_len,
+ mlock_vma_pages_range(vma, addr + old_len,
addr + new_len);
}
ret = addr;
diff --git a/mm/nommu.c b/mm/nommu.c
index ed75bc962fb..2696b24f2bb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -34,6 +34,8 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include "internal.h"
+
void *high_memory;
struct page *mem_map;
unsigned long max_mapnr;
@@ -128,20 +130,16 @@ unsigned int kobjsize(const void *objp)
return PAGE_SIZE << compound_order(page);
}
-/*
- * get a list of pages in an address range belonging to the specified process
- * and indicate the VMA that covers each page
- * - this is potentially dodgy as we may end incrementing the page count of a
- * slab page or a secondary page from a compound page
- * - don't permit access to VMAs that don't support it, such as I/O mappings
- */
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long start, int len, int write, int force,
- struct page **pages, struct vm_area_struct **vmas)
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int len, int flags,
+ struct page **pages, struct vm_area_struct **vmas)
{
struct vm_area_struct *vma;
unsigned long vm_flags;
int i;
+ int write = !!(flags & GUP_FLAGS_WRITE);
+ int force = !!(flags & GUP_FLAGS_FORCE);
+ int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
/* calculate required read or write permissions.
* - if 'force' is set, we only require the "MAY" flags.
@@ -156,7 +154,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
/* protect what we can, including chardevs */
if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
- !(vm_flags & vma->vm_flags))
+ (!ignore && !(vm_flags & vma->vm_flags)))
goto finish_or_fault;
if (pages) {
@@ -174,6 +172,30 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
finish_or_fault:
return i ? : -EFAULT;
}
+
+
+/*
+ * get a list of pages in an address range belonging to the specified process
+ * and indicate the VMA that covers each page
+ * - this is potentially dodgy as we may end incrementing the page count of a
+ * slab page or a secondary page from a compound page
+ * - don't permit access to VMAs that don't support it, such as I/O mappings
+ */
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int len, int write, int force,
+ struct page **pages, struct vm_area_struct **vmas)
+{
+ int flags = 0;
+
+ if (write)
+ flags |= GUP_FLAGS_WRITE;
+ if (force)
+ flags |= GUP_FLAGS_FORCE;
+
+ return __get_user_pages(tsk, mm,
+ start, len, flags,
+ pages, vmas);
+}
EXPORT_SYMBOL(get_user_pages);
DEFINE_RWLOCK(vmlist_lock);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b40f6d5f8fe..2970e35fd03 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -329,9 +329,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
struct zone *z =
&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
- x += zone_page_state(z, NR_FREE_PAGES)
- + zone_page_state(z, NR_INACTIVE)
- + zone_page_state(z, NR_ACTIVE);
+ x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
}
/*
* Make sure that the number of highmem pages is never larger
@@ -355,9 +353,7 @@ unsigned long determine_dirtyable_memory(void)
{
unsigned long x;
- x = global_page_state(NR_FREE_PAGES)
- + global_page_state(NR_INACTIVE)
- + global_page_state(NR_ACTIVE);
+ x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
if (!vm_highmem_is_dirtyable)
x -= highmem_dirtyable_memory(x);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9eb9eb92828..d0a240fbb8b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -44,7 +44,7 @@
#include <linux/backing-dev.h>
#include <linux/fault-inject.h>
#include <linux/page-isolation.h>
-#include <linux/memcontrol.h>
+#include <linux/page_cgroup.h>
#include <linux/debugobjects.h>
#include <asm/tlbflush.h>
@@ -223,17 +223,12 @@ static inline int bad_range(struct zone *zone, struct page *page)
static void bad_page(struct page *page)
{
- void *pc = page_get_page_cgroup(page);
-
printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
"page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
current->comm, page, (int)(2*sizeof(unsigned long)),
(unsigned long)page->flags, page->mapping,
page_mapcount(page), page_count(page));
- if (pc) {
- printk(KERN_EMERG "cgroup:%p\n", pc);
- page_reset_bad_cgroup(page);
- }
+
printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
KERN_EMERG "Backtrace:\n");
dump_stack();
@@ -454,14 +449,16 @@ static inline void __free_one_page(struct page *page,
static inline int free_pages_check(struct page *page)
{
+ free_page_mlock(page);
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
- (page_get_page_cgroup(page) != NULL) |
(page_count(page) != 0) |
(page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
+ if (PageSwapBacked(page))
+ __ClearPageSwapBacked(page);
/*
* For now, we report if PG_reserved was found set, but do not
* clear it, and do not free the page. But we shall soon need
@@ -600,7 +597,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
{
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
- (page_get_page_cgroup(page) != NULL) |
(page_count(page) != 0) |
(page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
bad_page(page);
@@ -614,7 +610,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
1 << PG_referenced | 1 << PG_arch_1 |
- 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk);
+ 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
+#ifdef CONFIG_UNEVICTABLE_LRU
+ | 1 << PG_mlocked
+#endif
+ );
set_page_private(page, 0);
set_page_refcounted(page);
@@ -1862,10 +1862,21 @@ void show_free_areas(void)
}
}
- printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+ printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
+ " inactive_file:%lu"
+//TODO: check/adjust line lengths
+#ifdef CONFIG_UNEVICTABLE_LRU
+ " unevictable:%lu"
+#endif
+ " dirty:%lu writeback:%lu unstable:%lu\n"
" free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
- global_page_state(NR_ACTIVE),
- global_page_state(NR_INACTIVE),
+ global_page_state(NR_ACTIVE_ANON),
+ global_page_state(NR_ACTIVE_FILE),
+ global_page_state(NR_INACTIVE_ANON),
+ global_page_state(NR_INACTIVE_FILE),
+#ifdef CONFIG_UNEVICTABLE_LRU
+ global_page_state(NR_UNEVICTABLE),
+#endif
global_page_state(NR_FILE_DIRTY),
global_page_state(NR_WRITEBACK),
global_page_state(NR_UNSTABLE_NFS),
@@ -1888,8 +1899,13 @@ void show_free_areas(void)
" min:%lukB"
" low:%lukB"
" high:%lukB"
- " active:%lukB"
- " inactive:%lukB"
+ " active_anon:%lukB"
+ " inactive_anon:%lukB"
+ " active_file:%lukB"
+ " inactive_file:%lukB"
+#ifdef CONFIG_UNEVICTABLE_LRU
+ " unevictable:%lukB"
+#endif
" present:%lukB"
" pages_scanned:%lu"
" all_unreclaimable? %s"
@@ -1899,8 +1915,13 @@ void show_free_areas(void)
K(zone->pages_min),
K(zone->pages_low),
K(zone->pages_high),
- K(zone_page_state(zone, NR_ACTIVE)),
- K(zone_page_state(zone, NR_INACTIVE)),
+ K(zone_page_state(zone, NR_ACTIVE_ANON)),
+ K(zone_page_state(zone, NR_INACTIVE_ANON)),
+ K(zone_page_state(zone, NR_ACTIVE_FILE)),
+ K(zone_page_state(zone, NR_INACTIVE_FILE)),
+#ifdef CONFIG_UNEVICTABLE_LRU
+ K(zone_page_state(zone, NR_UNEVICTABLE)),
+#endif
K(zone->present_pages),
zone->pages_scanned,
(zone_is_all_unreclaimable(zone) ? "yes" : "no")
@@ -3410,10 +3431,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
pgdat->nr_zones = 0;
init_waitqueue_head(&pgdat->kswapd_wait);
pgdat->kswapd_max_order = 0;
+ pgdat_page_cgroup_init(pgdat);
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, memmap_pages;
+ enum lru_list l;
size = zone_spanned_pages_in_node(nid, j, zones_size);
realsize = size - zone_absent_pages_in_node(nid, j,
@@ -3428,8 +3451,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
if (realsize >= memmap_pages) {
realsize -= memmap_pages;
- mminit_dprintk(MMINIT_TRACE, "memmap_init",
- "%s zone: %lu pages used for memmap\n",
+ printk(KERN_DEBUG
+ " %s zone: %lu pages used for memmap\n",
zone_names[j], memmap_pages);
} else
printk(KERN_WARNING
@@ -3439,8 +3462,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
/* Account for reserved pages */
if (j == 0 && realsize > dma_reserve) {
realsize -= dma_reserve;
- mminit_dprintk(MMINIT_TRACE, "memmap_init",
- "%s zone: %lu pages reserved\n",
+ printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
zone_names[0], dma_reserve);
}
@@ -3465,10 +3487,14 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
zone->prev_priority = DEF_PRIORITY;
zone_pcp_init(zone);
- INIT_LIST_HEAD(&zone->active_list);
- INIT_LIST_HEAD(&zone->inactive_list);
- zone->nr_scan_active = 0;
- zone->nr_scan_inactive = 0;
+ for_each_lru(l) {
+ INIT_LIST_HEAD(&zone->lru[l].list);
+ zone->lru[l].nr_scan = 0;
+ }
+ zone->recent_rotated[0] = 0;
+ zone->recent_rotated[1] = 0;
+ zone->recent_scanned[0] = 0;
+ zone->recent_scanned[1] = 0;
zap_zone_vm_stats(zone);
zone->flags = 0;
if (!size)
@@ -4210,7 +4236,7 @@ void setup_per_zone_pages_min(void)
for_each_zone(zone) {
u64 tmp;
- spin_lock_irqsave(&zone->lru_lock, flags);
+ spin_lock_irqsave(&zone->lock, flags);
tmp = (u64)pages_min * zone->present_pages;
do_div(tmp, lowmem_pages);
if (is_highmem(zone)) {
@@ -4242,13 +4268,53 @@ void setup_per_zone_pages_min(void)
zone->pages_low = zone->pages_min + (tmp >> 2);
zone->pages_high = zone->pages_min + (tmp >> 1);
setup_zone_migrate_reserve(zone);
- spin_unlock_irqrestore(&zone->lru_lock, flags);
+ spin_unlock_irqrestore(&zone->lock, flags);
}
/* update totalreserve_pages */
calculate_totalreserve_pages();
}
+/**
+ * setup_per_zone_inactive_ratio - called when min_free_kbytes changes.
+ *
+ * The inactive anon list should be small enough that the VM never has to
+ * do too much work, but large enough that each inactive page has a chance
+ * to be referenced again before it is swapped out.
+ *
+ * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
+ * INACTIVE_ANON pages on this zone's LRU, maintained by the
+ * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
+ * the anonymous pages are kept on the inactive list.
+ *
+ * total target max
+ * memory ratio inactive anon
+ * -------------------------------------
+ * 10MB 1 5MB
+ * 100MB 1 50MB
+ * 1GB 3 250MB
+ * 10GB 10 0.9GB
+ * 100GB 31 3GB
+ * 1TB 101 10GB
+ * 10TB 320 32GB
+ */
+void setup_per_zone_inactive_ratio(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ unsigned int gb, ratio;
+
+ /* Zone size in gigabytes */
+ gb = zone->present_pages >> (30 - PAGE_SHIFT);
+ ratio = int_sqrt(10 * gb);
+ if (!ratio)
+ ratio = 1;
+
+ zone->inactive_ratio = ratio;
+ }
+}
+
/*
* Initialise min_free_kbytes.
*
@@ -4286,6 +4352,7 @@ static int __init init_per_zone_pages_min(void)
min_free_kbytes = 65536;
setup_per_zone_pages_min();
setup_per_zone_lowmem_reserve();
+ setup_per_zone_inactive_ratio();
return 0;
}
module_init(init_per_zone_pages_min)
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
new file mode 100644
index 00000000000..5d86550701f
--- /dev/null
+++ b/mm/page_cgroup.c
@@ -0,0 +1,237 @@
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/bit_spinlock.h>
+#include <linux/page_cgroup.h>
+#include <linux/hash.h>
+#include <linux/memory.h>
+
+static void __meminit
+__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
+{
+ pc->flags = 0;
+ pc->mem_cgroup = NULL;
+ pc->page = pfn_to_page(pfn);
+}
+static unsigned long total_usage;
+
+#if !defined(CONFIG_SPARSEMEM)
+
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+ pgdat->node_page_cgroup = NULL;
+}
+
+struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+ unsigned long pfn = page_to_pfn(page);
+ unsigned long offset;
+ struct page_cgroup *base;
+
+ base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
+ if (unlikely(!base))
+ return NULL;
+
+ offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
+ return base + offset;
+}
+
+static int __init alloc_node_page_cgroup(int nid)
+{
+ struct page_cgroup *base, *pc;
+ unsigned long table_size;
+ unsigned long start_pfn, nr_pages, index;
+
+ start_pfn = NODE_DATA(nid)->node_start_pfn;
+ nr_pages = NODE_DATA(nid)->node_spanned_pages;
+
+ table_size = sizeof(struct page_cgroup) * nr_pages;
+
+ base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
+ table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+ if (!base)
+ return -ENOMEM;
+ for (index = 0; index < nr_pages; index++) {
+ pc = base + index;
+ __init_page_cgroup(pc, start_pfn + index);
+ }
+ NODE_DATA(nid)->node_page_cgroup = base;
+ total_usage += table_size;
+ return 0;
+}
+
+void __init page_cgroup_init(void)
+{
+
+ int nid, fail;
+
+ for_each_online_node(nid) {
+ fail = alloc_node_page_cgroup(nid);
+ if (fail)
+ goto fail;
+ }
+ printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
+ printk(KERN_INFO "please try cgroup_disable=memory option if you"
+ " don't want\n");
+ return;
+fail:
+ printk(KERN_CRIT "allocation of page_cgroup was failed.\n");
+ printk(KERN_CRIT "please try cgroup_disable=memory boot option\n");
+ panic("Out of memory");
+}
+
+#else /* CONFIG_FLAT_NODE_MEM_MAP */
+
+struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+ unsigned long pfn = page_to_pfn(page);
+ struct mem_section *section = __pfn_to_section(pfn);
+
+ return section->page_cgroup + pfn;
+}
+
+int __meminit init_section_page_cgroup(unsigned long pfn)
+{
+ struct mem_section *section;
+ struct page_cgroup *base, *pc;
+ unsigned long table_size;
+ int nid, index;
+
+ section = __pfn_to_section(pfn);
+
+ if (section->page_cgroup)
+ return 0;
+
+ nid = page_to_nid(pfn_to_page(pfn));
+
+ table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
+ base = kmalloc_node(table_size, GFP_KERNEL, nid);
+ if (!base)
+ base = vmalloc_node(table_size, nid);
+
+ if (!base) {
+ printk(KERN_ERR "page cgroup allocation failure\n");
+ return -ENOMEM;
+ }
+
+ for (index = 0; index < PAGES_PER_SECTION; index++) {
+ pc = base + index;
+ __init_page_cgroup(pc, pfn + index);
+ }
+
+ section = __pfn_to_section(pfn);
+ section->page_cgroup = base - pfn;
+ total_usage += table_size;
+ return 0;
+}
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __free_page_cgroup(unsigned long pfn)
+{
+ struct mem_section *ms;
+ struct page_cgroup *base;
+
+ ms = __pfn_to_section(pfn);
+ if (!ms || !ms->page_cgroup)
+ return;
+ base = ms->page_cgroup + pfn;
+ ms->page_cgroup = NULL;
+ if (is_vmalloc_addr(base))
+ vfree(base);
+ else
+ kfree(base);
+}
+
+int online_page_cgroup(unsigned long start_pfn,
+ unsigned long nr_pages,
+ int nid)
+{
+ unsigned long start, end, pfn;
+ int fail = 0;
+
+ start = start_pfn & (PAGES_PER_SECTION - 1);
+ end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
+
+ for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
+ if (!pfn_present(pfn))
+ continue;
+ fail = init_section_page_cgroup(pfn);
+ }
+ if (!fail)
+ return 0;
+
+ /* rollback */
+ for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
+ __free_page_cgroup(pfn);
+
+ return -ENOMEM;
+}
+
+int offline_page_cgroup(unsigned long start_pfn,
+ unsigned long nr_pages, int nid)
+{
+ unsigned long start, end, pfn;
+
+ start = start_pfn & (PAGES_PER_SECTION - 1);
+ end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
+
+ for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
+ __free_page_cgroup(pfn);
+ return 0;
+
+}
+
+static int page_cgroup_callback(struct notifier_block *self,
+ unsigned long action, void *arg)
+{
+ struct memory_notify *mn = arg;
+ int ret = 0;
+ switch (action) {
+ case MEM_GOING_ONLINE:
+ ret = online_page_cgroup(mn->start_pfn,
+ mn->nr_pages, mn->status_change_nid);
+ break;
+ case MEM_CANCEL_ONLINE:
+ case MEM_OFFLINE:
+ offline_page_cgroup(mn->start_pfn,
+ mn->nr_pages, mn->status_change_nid);
+ break;
+ case MEM_GOING_OFFLINE:
+ break;
+ case MEM_ONLINE:
+ case MEM_CANCEL_OFFLINE:
+ break;
+ }
+ ret = notifier_from_errno(ret);
+ return ret;
+}
+
+#endif
+
+void __init page_cgroup_init(void)
+{
+ unsigned long pfn;
+ int fail = 0;
+
+ for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
+ if (!pfn_present(pfn))
+ continue;
+ fail = init_section_page_cgroup(pfn);
+ }
+ if (fail) {
+ printk(KERN_CRIT "try cgroup_disable=memory boot option\n");
+ panic("Out of memory");
+ } else {
+ hotplug_memory_notifier(page_cgroup_callback, 0);
+ }
+ printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
+ printk(KERN_INFO "please try cgroup_disable=memory option if you don't"
+ " want\n");
+}
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+ return;
+}
+
+#endif
diff --git a/mm/readahead.c b/mm/readahead.c
index 6cbd9a72fde..bec83c15a78 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -229,7 +229,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
*/
unsigned long max_sane_readahead(unsigned long nr)
{
- return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE)
+ return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
+ node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 0383acfcb06..10993942d6c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -53,9 +53,47 @@
#include <asm/tlbflush.h>
-struct kmem_cache *anon_vma_cachep;
+#include "internal.h"
-/* This must be called under the mmap_sem. */
+static struct kmem_cache *anon_vma_cachep;
+
+static inline struct anon_vma *anon_vma_alloc(void)
+{
+ return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+}
+
+static inline void anon_vma_free(struct anon_vma *anon_vma)
+{
+ kmem_cache_free(anon_vma_cachep, anon_vma);
+}
+
+/**
+ * anon_vma_prepare - attach an anon_vma to a memory region
+ * @vma: the memory region in question
+ *
+ * This makes sure the memory mapping described by 'vma' has
+ * an 'anon_vma' attached to it, so that we can associate the
+ * anonymous pages mapped into it with that anon_vma.
+ *
+ * The common case will be that we already have one, but if
+ * if not we either need to find an adjacent mapping that we
+ * can re-use the anon_vma from (very common when the only
+ * reason for splitting a vma has been mprotect()), or we
+ * allocate a new one.
+ *
+ * Anon-vma allocations are very subtle, because we may have
+ * optimistically looked up an anon_vma in page_lock_anon_vma()
+ * and that may actually touch the spinlock even in the newly
+ * allocated vma (it depends on RCU to make sure that the
+ * anon_vma isn't actually destroyed).
+ *
+ * As a result, we need to do proper anon_vma locking even
+ * for the new allocation. At the same time, we do not want
+ * to do any locking for the common case of already having
+ * an anon_vma.
+ *
+ * This must be called with the mmap_sem held for reading.
+ */
int anon_vma_prepare(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
@@ -63,20 +101,17 @@ int anon_vma_prepare(struct vm_area_struct *vma)
might_sleep();
if (unlikely(!anon_vma)) {
struct mm_struct *mm = vma->vm_mm;
- struct anon_vma *allocated, *locked;
+ struct anon_vma *allocated;
anon_vma = find_mergeable_anon_vma(vma);
- if (anon_vma) {
- allocated = NULL;
- locked = anon_vma;
- spin_lock(&locked->lock);
- } else {
+ allocated = NULL;
+ if (!anon_vma) {
anon_vma = anon_vma_alloc();
if (unlikely(!anon_vma))
return -ENOMEM;
allocated = anon_vma;
- locked = NULL;
}
+ spin_lock(&anon_vma->lock);
/* page_table_lock to protect against threads */
spin_lock(&mm->page_table_lock);
@@ -87,8 +122,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
}
spin_unlock(&mm->page_table_lock);
- if (locked)
- spin_unlock(&locked->lock);
+ spin_unlock(&anon_vma->lock);
if (unlikely(allocated))
anon_vma_free(allocated);
}
@@ -157,7 +191,7 @@ void __init anon_vma_init(void)
* Getting a lock on a stable anon_vma from a page off the LRU is
* tricky: page_lock_anon_vma rely on RCU to guard against the races.
*/
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
{
struct anon_vma *anon_vma;
unsigned long anon_mapping;
@@ -177,7 +211,7 @@ out:
return NULL;
}
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
spin_unlock(&anon_vma->lock);
rcu_read_unlock();
@@ -268,6 +302,32 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
return NULL;
}
+/**
+ * page_mapped_in_vma - check whether a page is really mapped in a VMA
+ * @page: the page to test
+ * @vma: the VMA to test
+ *
+ * Returns 1 if the page is mapped into the page tables of the VMA, 0
+ * if the page is not mapped into the page tables of this VMA. Only
+ * valid for normal file or anonymous VMAs.
+ */
+static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+{
+ unsigned long address;
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ address = vma_address(page, vma);
+ if (address == -EFAULT) /* out of vma range */
+ return 0;
+ pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
+ if (!pte) /* the page is not in this mm */
+ return 0;
+ pte_unmap_unlock(pte, ptl);
+
+ return 1;
+}
+
/*
* Subfunctions of page_referenced: page_referenced_one called
* repeatedly from either page_referenced_anon or page_referenced_file.
@@ -289,10 +349,17 @@ static int page_referenced_one(struct page *page,
if (!pte)
goto out;
+ /*
+ * Don't want to elevate referenced for mlocked page that gets this far,
+ * in order that it progresses to try_to_unmap and is moved to the
+ * unevictable list.
+ */
if (vma->vm_flags & VM_LOCKED) {
- referenced++;
*mapcount = 1; /* break early from loop */
- } else if (ptep_clear_flush_young_notify(vma, address, pte))
+ goto out_unmap;
+ }
+
+ if (ptep_clear_flush_young_notify(vma, address, pte))
referenced++;
/* Pretend the page is referenced if the task has the
@@ -301,6 +368,7 @@ static int page_referenced_one(struct page *page,
rwsem_is_locked(&mm->mmap_sem))
referenced++;
+out_unmap:
(*mapcount)--;
pte_unmap_unlock(pte, ptl);
out:
@@ -390,11 +458,6 @@ static int page_referenced_file(struct page *page,
*/
if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
continue;
- if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
- == (VM_LOCKED|VM_MAYSHARE)) {
- referenced++;
- break;
- }
referenced += page_referenced_one(page, vma, &mapcount);
if (!mapcount)
break;
@@ -674,8 +737,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
page_clear_dirty(page);
set_page_dirty(page);
}
-
- mem_cgroup_uncharge_page(page);
+ if (PageAnon(page))
+ mem_cgroup_uncharge_page(page);
__dec_zone_page_state(page,
PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
/*
@@ -717,11 +780,16 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* If it's recently referenced (perhaps page_referenced
* skipped over this mm) then we should reactivate it.
*/
- if (!migration && ((vma->vm_flags & VM_LOCKED) ||
- (ptep_clear_flush_young_notify(vma, address, pte)))) {
- ret = SWAP_FAIL;
- goto out_unmap;
- }
+ if (!migration) {
+ if (vma->vm_flags & VM_LOCKED) {
+ ret = SWAP_MLOCK;
+ goto out_unmap;
+ }
+ if (ptep_clear_flush_young_notify(vma, address, pte)) {
+ ret = SWAP_FAIL;
+ goto out_unmap;
+ }
+ }
/* Nuke the page table entry. */
flush_cache_page(vma, address, page_to_pfn(page));
@@ -802,12 +870,17 @@ out:
* For very sparsely populated VMAs this is a little inefficient - chances are
* there there won't be many ptes located within the scan cluster. In this case
* maybe we could scan further - to the end of the pte page, perhaps.
+ *
+ * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can
+ * acquire it without blocking. If vma locked, mlock the pages in the cluster,
+ * rather than unmapping them. If we encounter the "check_page" that vmscan is
+ * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
*/
#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
-static void try_to_unmap_cluster(unsigned long cursor,
- unsigned int *mapcount, struct vm_area_struct *vma)
+static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
+ struct vm_area_struct *vma, struct page *check_page)
{
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
@@ -819,6 +892,8 @@ static void try_to_unmap_cluster(unsigned long cursor,
struct page *page;
unsigned long address;
unsigned long end;
+ int ret = SWAP_AGAIN;
+ int locked_vma = 0;
address = (vma->vm_start + cursor) & CLUSTER_MASK;
end = address + CLUSTER_SIZE;
@@ -829,15 +904,26 @@ static void try_to_unmap_cluster(unsigned long cursor,
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
- return;
+ return ret;
pud = pud_offset(pgd, address);
if (!pud_present(*pud))
- return;
+ return ret;
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
- return;
+ return ret;
+
+ /*
+ * MLOCK_PAGES => feature is configured.
+ * if we can acquire the mmap_sem for read, and vma is VM_LOCKED,
+ * keep the sem while scanning the cluster for mlocking pages.
+ */
+ if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) {
+ locked_vma = (vma->vm_flags & VM_LOCKED);
+ if (!locked_vma)
+ up_read(&vma->vm_mm->mmap_sem); /* don't need it */
+ }
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
@@ -850,6 +936,13 @@ static void try_to_unmap_cluster(unsigned long cursor,
page = vm_normal_page(vma, address, *pte);
BUG_ON(!page || PageAnon(page));
+ if (locked_vma) {
+ mlock_vma_page(page); /* no-op if already mlocked */
+ if (page == check_page)
+ ret = SWAP_MLOCK;
+ continue; /* don't unmap */
+ }
+
if (ptep_clear_flush_young_notify(vma, address, pte))
continue;
@@ -871,39 +964,104 @@ static void try_to_unmap_cluster(unsigned long cursor,
(*mapcount)--;
}
pte_unmap_unlock(pte - 1, ptl);
+ if (locked_vma)
+ up_read(&vma->vm_mm->mmap_sem);
+ return ret;
}
-static int try_to_unmap_anon(struct page *page, int migration)
+/*
+ * common handling for pages mapped in VM_LOCKED vmas
+ */
+static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
+{
+ int mlocked = 0;
+
+ if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
+ if (vma->vm_flags & VM_LOCKED) {
+ mlock_vma_page(page);
+ mlocked++; /* really mlocked the page */
+ }
+ up_read(&vma->vm_mm->mmap_sem);
+ }
+ return mlocked;
+}
+
+/**
+ * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
+ * rmap method
+ * @page: the page to unmap/unlock
+ * @unlock: request for unlock rather than unmap [unlikely]
+ * @migration: unmapping for migration - ignored if @unlock
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the anon_vma struct it points to.
+ *
+ * This function is only called from try_to_unmap/try_to_munlock for
+ * anonymous pages.
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write. So, we won't recheck
+ * vm_flags for that VMA. That should be OK, because that vma shouldn't be
+ * 'LOCKED.
+ */
+static int try_to_unmap_anon(struct page *page, int unlock, int migration)
{
struct anon_vma *anon_vma;
struct vm_area_struct *vma;
+ unsigned int mlocked = 0;
int ret = SWAP_AGAIN;
+ if (MLOCK_PAGES && unlikely(unlock))
+ ret = SWAP_SUCCESS; /* default for try_to_munlock() */
+
anon_vma = page_lock_anon_vma(page);
if (!anon_vma)
return ret;
list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
- ret = try_to_unmap_one(page, vma, migration);
- if (ret == SWAP_FAIL || !page_mapped(page))
- break;
+ if (MLOCK_PAGES && unlikely(unlock)) {
+ if (!((vma->vm_flags & VM_LOCKED) &&
+ page_mapped_in_vma(page, vma)))
+ continue; /* must visit all unlocked vmas */
+ ret = SWAP_MLOCK; /* saw at least one mlocked vma */
+ } else {
+ ret = try_to_unmap_one(page, vma, migration);
+ if (ret == SWAP_FAIL || !page_mapped(page))
+ break;
+ }
+ if (ret == SWAP_MLOCK) {
+ mlocked = try_to_mlock_page(page, vma);
+ if (mlocked)
+ break; /* stop if actually mlocked page */
+ }
}
page_unlock_anon_vma(anon_vma);
+
+ if (mlocked)
+ ret = SWAP_MLOCK; /* actually mlocked the page */
+ else if (ret == SWAP_MLOCK)
+ ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
+
return ret;
}
/**
- * try_to_unmap_file - unmap file page using the object-based rmap method
- * @page: the page to unmap
- * @migration: migration flag
+ * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
+ * @page: the page to unmap/unlock
+ * @unlock: request for unlock rather than unmap [unlikely]
+ * @migration: unmapping for migration - ignored if @unlock
*
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the address_space struct it points to.
*
- * This function is only called from try_to_unmap for object-based pages.
+ * This function is only called from try_to_unmap/try_to_munlock for
+ * object-based pages.
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write. So, we won't recheck
+ * vm_flags for that VMA. That should be OK, because that vma shouldn't be
+ * 'LOCKED.
*/
-static int try_to_unmap_file(struct page *page, int migration)
+static int try_to_unmap_file(struct page *page, int unlock, int migration)
{
struct address_space *mapping = page->mapping;
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -914,20 +1072,44 @@ static int try_to_unmap_file(struct page *page, int migration)
unsigned long max_nl_cursor = 0;
unsigned long max_nl_size = 0;
unsigned int mapcount;
+ unsigned int mlocked = 0;
+
+ if (MLOCK_PAGES && unlikely(unlock))
+ ret = SWAP_SUCCESS; /* default for try_to_munlock() */
spin_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
- ret = try_to_unmap_one(page, vma, migration);
- if (ret == SWAP_FAIL || !page_mapped(page))
- goto out;
+ if (MLOCK_PAGES && unlikely(unlock)) {
+ if (!(vma->vm_flags & VM_LOCKED))
+ continue; /* must visit all vmas */
+ ret = SWAP_MLOCK;
+ } else {
+ ret = try_to_unmap_one(page, vma, migration);
+ if (ret == SWAP_FAIL || !page_mapped(page))
+ goto out;
+ }
+ if (ret == SWAP_MLOCK) {
+ mlocked = try_to_mlock_page(page, vma);
+ if (mlocked)
+ break; /* stop if actually mlocked page */
+ }
}
+ if (mlocked)
+ goto out;
+
if (list_empty(&mapping->i_mmap_nonlinear))
goto out;
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) {
- if ((vma->vm_flags & VM_LOCKED) && !migration)
+ if (MLOCK_PAGES && unlikely(unlock)) {
+ if (!(vma->vm_flags & VM_LOCKED))
+ continue; /* must visit all vmas */
+ ret = SWAP_MLOCK; /* leave mlocked == 0 */
+ goto out; /* no need to look further */
+ }
+ if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
continue;
cursor = (unsigned long) vma->vm_private_data;
if (cursor > max_nl_cursor)
@@ -937,7 +1119,7 @@ static int try_to_unmap_file(struct page *page, int migration)
max_nl_size = cursor;
}
- if (max_nl_size == 0) { /* any nonlinears locked or reserved */
+ if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
ret = SWAP_FAIL;
goto out;
}
@@ -961,12 +1143,16 @@ static int try_to_unmap_file(struct page *page, int migration)
do {
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) {
- if ((vma->vm_flags & VM_LOCKED) && !migration)
+ if (!MLOCK_PAGES && !migration &&
+ (vma->vm_flags & VM_LOCKED))
continue;
cursor = (unsigned long) vma->vm_private_data;
while ( cursor < max_nl_cursor &&
cursor < vma->vm_end - vma->vm_start) {
- try_to_unmap_cluster(cursor, &mapcount, vma);
+ ret = try_to_unmap_cluster(cursor, &mapcount,
+ vma, page);
+ if (ret == SWAP_MLOCK)
+ mlocked = 2; /* to return below */
cursor += CLUSTER_SIZE;
vma->vm_private_data = (void *) cursor;
if ((int)mapcount <= 0)
@@ -987,6 +1173,10 @@ static int try_to_unmap_file(struct page *page, int migration)
vma->vm_private_data = NULL;
out:
spin_unlock(&mapping->i_mmap_lock);
+ if (mlocked)
+ ret = SWAP_MLOCK; /* actually mlocked the page */
+ else if (ret == SWAP_MLOCK)
+ ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
return ret;
}
@@ -1002,6 +1192,7 @@ out:
* SWAP_SUCCESS - we succeeded in removing all mappings
* SWAP_AGAIN - we missed a mapping, try again later
* SWAP_FAIL - the page is unswappable
+ * SWAP_MLOCK - page is mlocked.
*/
int try_to_unmap(struct page *page, int migration)
{
@@ -1010,12 +1201,36 @@ int try_to_unmap(struct page *page, int migration)
BUG_ON(!PageLocked(page));
if (PageAnon(page))
- ret = try_to_unmap_anon(page, migration);
+ ret = try_to_unmap_anon(page, 0, migration);
else
- ret = try_to_unmap_file(page, migration);
-
- if (!page_mapped(page))
+ ret = try_to_unmap_file(page, 0, migration);
+ if (ret != SWAP_MLOCK && !page_mapped(page))
ret = SWAP_SUCCESS;
return ret;
}
+#ifdef CONFIG_UNEVICTABLE_LRU
+/**
+ * try_to_munlock - try to munlock a page
+ * @page: the page to be munlocked
+ *
+ * Called from munlock code. Checks all of the VMAs mapping the page
+ * to make sure nobody else has this page mlocked. The page will be
+ * returned with PG_mlocked cleared if no other vmas have it mlocked.
+ *
+ * Return values are:
+ *
+ * SWAP_SUCCESS - no vma's holding page mlocked.
+ * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem
+ * SWAP_MLOCK - page is now mlocked.
+ */
+int try_to_munlock(struct page *page)
+{
+ VM_BUG_ON(!PageLocked(page) || PageLRU(page));
+
+ if (PageAnon(page))
+ return try_to_unmap_anon(page, 1, 0);
+ else
+ return try_to_unmap_file(page, 1, 0);
+}
+#endif
diff --git a/mm/shmem.c b/mm/shmem.c
index d87958a5f03..d38d7e61fcd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -199,7 +199,7 @@ static struct vm_operations_struct shmem_vm_ops;
static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
.ra_pages = 0, /* No readahead */
- .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
.unplug_io_fn = default_unplug_io_fn,
};
@@ -1367,6 +1367,7 @@ repeat:
error = -ENOMEM;
goto failed;
}
+ SetPageSwapBacked(filepage);
/* Precharge page while we can wait, compensate after */
error = mem_cgroup_cache_charge(filepage, current->mm,
@@ -1476,12 +1477,16 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
if (!user_shm_lock(inode->i_size, user))
goto out_nomem;
info->flags |= VM_LOCKED;
+ mapping_set_unevictable(file->f_mapping);
}
if (!lock && (info->flags & VM_LOCKED) && user) {
user_shm_unlock(inode->i_size, user);
info->flags &= ~VM_LOCKED;
+ mapping_clear_unevictable(file->f_mapping);
+ scan_mapping_unevictable_pages(file->f_mapping);
}
retval = 0;
+
out_nomem:
spin_unlock(&info->lock);
return retval;
diff --git a/mm/swap.c b/mm/swap.c
index 9e0cb311807..2152e48a7b8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -31,11 +31,12 @@
#include <linux/backing-dev.h>
#include <linux/memcontrol.h>
+#include "internal.h"
+
/* How many pages do we try to swap or page in/out together? */
int page_cluster;
-static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs);
+static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
/*
@@ -116,8 +117,9 @@ static void pagevec_move_tail(struct pagevec *pvec)
zone = pagezone;
spin_lock(&zone->lru_lock);
}
- if (PageLRU(page) && !PageActive(page)) {
- list_move_tail(&page->lru, &zone->inactive_list);
+ if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+ int lru = page_is_file_cache(page);
+ list_move_tail(&page->lru, &zone->lru[lru].list);
pgmoved++;
}
}
@@ -136,7 +138,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
void rotate_reclaimable_page(struct page *page)
{
if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
- PageLRU(page)) {
+ !PageUnevictable(page) && PageLRU(page)) {
struct pagevec *pvec;
unsigned long flags;
@@ -157,12 +159,19 @@ void activate_page(struct page *page)
struct zone *zone = page_zone(page);
spin_lock_irq(&zone->lru_lock);
- if (PageLRU(page) && !PageActive(page)) {
- del_page_from_inactive_list(zone, page);
+ if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+ int file = page_is_file_cache(page);
+ int lru = LRU_BASE + file;
+ del_page_from_lru_list(zone, page, lru);
+
SetPageActive(page);
- add_page_to_active_list(zone, page);
+ lru += LRU_ACTIVE;
+ add_page_to_lru_list(zone, page, lru);
__count_vm_event(PGACTIVATE);
- mem_cgroup_move_lists(page, true);
+ mem_cgroup_move_lists(page, lru);
+
+ zone->recent_rotated[!!file]++;
+ zone->recent_scanned[!!file]++;
}
spin_unlock_irq(&zone->lru_lock);
}
@@ -176,7 +185,8 @@ void activate_page(struct page *page)
*/
void mark_page_accessed(struct page *page)
{
- if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
+ if (!PageActive(page) && !PageUnevictable(page) &&
+ PageReferenced(page) && PageLRU(page)) {
activate_page(page);
ClearPageReferenced(page);
} else if (!PageReferenced(page)) {
@@ -186,28 +196,73 @@ void mark_page_accessed(struct page *page)
EXPORT_SYMBOL(mark_page_accessed);
-/**
- * lru_cache_add: add a page to the page lists
- * @page: the page to add
- */
-void lru_cache_add(struct page *page)
+void __lru_cache_add(struct page *page, enum lru_list lru)
{
- struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
+ struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
page_cache_get(page);
if (!pagevec_add(pvec, page))
- __pagevec_lru_add(pvec);
+ ____pagevec_lru_add(pvec, lru);
put_cpu_var(lru_add_pvecs);
}
-void lru_cache_add_active(struct page *page)
+/**
+ * lru_cache_add_lru - add a page to a page list
+ * @page: the page to be added to the LRU.
+ * @lru: the LRU list to which the page is added.
+ */
+void lru_cache_add_lru(struct page *page, enum lru_list lru)
{
- struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
+ if (PageActive(page)) {
+ VM_BUG_ON(PageUnevictable(page));
+ ClearPageActive(page);
+ } else if (PageUnevictable(page)) {
+ VM_BUG_ON(PageActive(page));
+ ClearPageUnevictable(page);
+ }
- page_cache_get(page);
- if (!pagevec_add(pvec, page))
- __pagevec_lru_add_active(pvec);
- put_cpu_var(lru_add_active_pvecs);
+ VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
+ __lru_cache_add(page, lru);
+}
+
+/**
+ * add_page_to_unevictable_list - add a page to the unevictable list
+ * @page: the page to be added to the unevictable list
+ *
+ * Add page directly to its zone's unevictable list. To avoid races with
+ * tasks that might be making the page evictable, through eg. munlock,
+ * munmap or exit, while it's not on the lru, we want to add the page
+ * while it's locked or otherwise "invisible" to other tasks. This is
+ * difficult to do when using the pagevec cache, so bypass that.
+ */
+void add_page_to_unevictable_list(struct page *page)
+{
+ struct zone *zone = page_zone(page);
+
+ spin_lock_irq(&zone->lru_lock);
+ SetPageUnevictable(page);
+ SetPageLRU(page);
+ add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
+ spin_unlock_irq(&zone->lru_lock);
+}
+
+/**
+ * lru_cache_add_active_or_unevictable
+ * @page: the page to be added to LRU
+ * @vma: vma in which page is mapped for determining reclaimability
+ *
+ * place @page on active or unevictable LRU list, depending on
+ * page_evictable(). Note that if the page is not evictable,
+ * it goes directly back onto it's zone's unevictable list. It does
+ * NOT use a per cpu pagevec.
+ */
+void lru_cache_add_active_or_unevictable(struct page *page,
+ struct vm_area_struct *vma)
+{
+ if (page_evictable(page, vma))
+ lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page));
+ else
+ add_page_to_unevictable_list(page);
}
/*
@@ -217,15 +272,15 @@ void lru_cache_add_active(struct page *page)
*/
static void drain_cpu_pagevecs(int cpu)
{
+ struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
struct pagevec *pvec;
+ int lru;
- pvec = &per_cpu(lru_add_pvecs, cpu);
- if (pagevec_count(pvec))
- __pagevec_lru_add(pvec);
-
- pvec = &per_cpu(lru_add_active_pvecs, cpu);
- if (pagevec_count(pvec))
- __pagevec_lru_add_active(pvec);
+ for_each_lru(lru) {
+ pvec = &pvecs[lru - LRU_BASE];
+ if (pagevec_count(pvec))
+ ____pagevec_lru_add(pvec, lru);
+ }
pvec = &per_cpu(lru_rotate_pvecs, cpu);
if (pagevec_count(pvec)) {
@@ -244,7 +299,7 @@ void lru_add_drain(void)
put_cpu();
}
-#ifdef CONFIG_NUMA
+#if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU)
static void lru_add_drain_per_cpu(struct work_struct *dummy)
{
lru_add_drain();
@@ -308,6 +363,7 @@ void release_pages(struct page **pages, int nr, int cold)
if (PageLRU(page)) {
struct zone *pagezone = page_zone(page);
+
if (pagezone != zone) {
if (zone)
spin_unlock_irqrestore(&zone->lru_lock,
@@ -380,10 +436,11 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
* Add the passed pages to the LRU, then drop the caller's refcount
* on them. Reinitialises the caller's pagevec.
*/
-void __pagevec_lru_add(struct pagevec *pvec)
+void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
{
int i;
struct zone *zone = NULL;
+ VM_BUG_ON(is_unevictable_lru(lru));
for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
@@ -395,9 +452,13 @@ void __pagevec_lru_add(struct pagevec *pvec)
zone = pagezone;
spin_lock_irq(&zone->lru_lock);
}
+ VM_BUG_ON(PageActive(page));
+ VM_BUG_ON(PageUnevictable(page));
VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- add_page_to_inactive_list(zone, page);
+ if (is_active_lru(lru))
+ SetPageActive(page);
+ add_page_to_lru_list(zone, page, lru);
}
if (zone)
spin_unlock_irq(&zone->lru_lock);
@@ -405,48 +466,45 @@ void __pagevec_lru_add(struct pagevec *pvec)
pagevec_reinit(pvec);
}
-EXPORT_SYMBOL(__pagevec_lru_add);
+EXPORT_SYMBOL(____pagevec_lru_add);
-void __pagevec_lru_add_active(struct pagevec *pvec)
+/*
+ * Try to drop buffers from the pages in a pagevec
+ */
+void pagevec_strip(struct pagevec *pvec)
{
int i;
- struct zone *zone = NULL;
for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
- struct zone *pagezone = page_zone(page);
- if (pagezone != zone) {
- if (zone)
- spin_unlock_irq(&zone->lru_lock);
- zone = pagezone;
- spin_lock_irq(&zone->lru_lock);
+ if (PagePrivate(page) && trylock_page(page)) {
+ if (PagePrivate(page))
+ try_to_release_page(page, 0);
+ unlock_page(page);
}
- VM_BUG_ON(PageLRU(page));
- SetPageLRU(page);
- VM_BUG_ON(PageActive(page));
- SetPageActive(page);
- add_page_to_active_list(zone, page);
}
- if (zone)
- spin_unlock_irq(&zone->lru_lock);
- release_pages(pvec->pages, pvec->nr, pvec->cold);
- pagevec_reinit(pvec);
}
-/*
- * Try to drop buffers from the pages in a pagevec
+/**
+ * pagevec_swap_free - try to free swap space from the pages in a pagevec
+ * @pvec: pagevec with swapcache pages to free the swap space of
+ *
+ * The caller needs to hold an extra reference to each page and
+ * not hold the page lock on the pages. This function uses a
+ * trylock on the page lock so it may not always free the swap
+ * space associated with a page.
*/
-void pagevec_strip(struct pagevec *pvec)
+void pagevec_swap_free(struct pagevec *pvec)
{
int i;
for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
- if (PagePrivate(page) && trylock_page(page)) {
- if (PagePrivate(page))
- try_to_release_page(page, 0);
+ if (PageSwapCache(page) && trylock_page(page)) {
+ if (PageSwapCache(page))
+ remove_exclusive_swap_page_ref(page);
unlock_page(page);
}
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 797c3831cbe..3353c9029ce 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -33,7 +33,7 @@ static const struct address_space_operations swap_aops = {
};
static struct backing_dev_info swap_backing_dev_info = {
- .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
.unplug_io_fn = swap_unplug_io_fn,
};
@@ -75,6 +75,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
BUG_ON(!PageLocked(page));
BUG_ON(PageSwapCache(page));
BUG_ON(PagePrivate(page));
+ BUG_ON(!PageSwapBacked(page));
error = radix_tree_preload(gfp_mask);
if (!error) {
page_cache_get(page);
@@ -302,17 +303,19 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* re-using the just freed swap entry for an existing page.
* May fail (-ENOMEM) if radix-tree node allocation failed.
*/
- set_page_locked(new_page);
+ __set_page_locked(new_page);
+ SetPageSwapBacked(new_page);
err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
if (likely(!err)) {
/*
* Initiate read into locked page and return.
*/
- lru_cache_add_active(new_page);
+ lru_cache_add_anon(new_page);
swap_readpage(NULL, new_page);
return new_page;
}
- clear_page_locked(new_page);
+ ClearPageSwapBacked(new_page);
+ __clear_page_locked(new_page);
swap_free(entry);
} while (err != -ENOMEM);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1e330f2998f..90cb67a5417 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -344,7 +344,7 @@ int can_share_swap_page(struct page *page)
* Work out if there are any other processes sharing this
* swap cache page. Free it if you can. Return success.
*/
-int remove_exclusive_swap_page(struct page *page)
+static int remove_exclusive_swap_page_count(struct page *page, int count)
{
int retval;
struct swap_info_struct * p;
@@ -357,7 +357,7 @@ int remove_exclusive_swap_page(struct page *page)
return 0;
if (PageWriteback(page))
return 0;
- if (page_count(page) != 2) /* 2: us + cache */
+ if (page_count(page) != count) /* us + cache + ptes */
return 0;
entry.val = page_private(page);
@@ -370,7 +370,7 @@ int remove_exclusive_swap_page(struct page *page)
if (p->swap_map[swp_offset(entry)] == 1) {
/* Recheck the page count with the swapcache lock held.. */
spin_lock_irq(&swapper_space.tree_lock);
- if ((page_count(page) == 2) && !PageWriteback(page)) {
+ if ((page_count(page) == count) && !PageWriteback(page)) {
__delete_from_swap_cache(page);
SetPageDirty(page);
retval = 1;
@@ -388,6 +388,25 @@ int remove_exclusive_swap_page(struct page *page)
}
/*
+ * Most of the time the page should have two references: one for the
+ * process and one for the swap cache.
+ */
+int remove_exclusive_swap_page(struct page *page)
+{
+ return remove_exclusive_swap_page_count(page, 2);
+}
+
+/*
+ * The pageout code holds an extra reference to the page. That raises
+ * the reference count to test for to 2 for a page that is only in the
+ * swap cache plus 1 for each process that maps the page.
+ */
+int remove_exclusive_swap_page_ref(struct page *page)
+{
+ return remove_exclusive_swap_page_count(page, 2 + page_mapcount(page));
+}
+
+/*
* Free the swap entry like above, but also try to
* free the page cache entry if it is the last user.
*/
@@ -403,7 +422,7 @@ void free_swap_and_cache(swp_entry_t entry)
if (p) {
if (swap_entry_free(p, swp_offset(entry)) == 1) {
page = find_get_page(&swapper_space, entry.val);
- if (page && unlikely(!trylock_page(page))) {
+ if (page && !trylock_page(page)) {
page_cache_release(page);
page = NULL;
}
diff --git a/mm/truncate.c b/mm/truncate.c
index e83e4b114ef..1229211104f 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -18,6 +18,7 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/buffer_head.h> /* grr. try_to_release_page,
do_invalidatepage */
+#include "internal.h"
/**
@@ -103,6 +104,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ clear_page_mlock(page);
remove_from_page_cache(page);
ClearPageMappedToDisk(page);
page_cache_release(page); /* pagecache ref */
@@ -127,6 +129,7 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
if (PagePrivate(page) && !try_to_release_page(page, 0))
return 0;
+ clear_page_mlock(page);
ret = remove_mapping(mapping, page);
return ret;
@@ -352,6 +355,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
if (PageDirty(page))
goto failed;
+ clear_page_mlock(page);
BUG_ON(PagePrivate(page));
__remove_from_page_cache(page);
spin_unlock_irq(&mapping->tree_lock);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index bba06c41fc5..712ae47af0b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -8,6 +8,7 @@
* Numa awareness, Christoph Lameter, SGI, June 2005
*/
+#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/highmem.h>
@@ -18,16 +19,17 @@
#include <linux/debugobjects.h>
#include <linux/vmalloc.h>
#include <linux/kallsyms.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+#include <asm/atomic.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
-DEFINE_RWLOCK(vmlist_lock);
-struct vm_struct *vmlist;
-
-static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
- int node, void *caller);
+/*** Page table manipulation functions ***/
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
{
@@ -40,8 +42,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
} while (pte++, addr += PAGE_SIZE, addr != end);
}
-static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
- unsigned long end)
+static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
{
pmd_t *pmd;
unsigned long next;
@@ -55,8 +56,7 @@ static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
} while (pmd++, addr = next, addr != end);
}
-static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
- unsigned long end)
+static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
{
pud_t *pud;
unsigned long next;
@@ -70,12 +70,10 @@ static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
} while (pud++, addr = next, addr != end);
}
-void unmap_kernel_range(unsigned long addr, unsigned long size)
+static void vunmap_page_range(unsigned long addr, unsigned long end)
{
pgd_t *pgd;
unsigned long next;
- unsigned long start = addr;
- unsigned long end = addr + size;
BUG_ON(addr >= end);
pgd = pgd_offset_k(addr);
@@ -86,35 +84,36 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
continue;
vunmap_pud_range(pgd, addr, next);
} while (pgd++, addr = next, addr != end);
- flush_tlb_kernel_range(start, end);
-}
-
-static void unmap_vm_area(struct vm_struct *area)
-{
- unmap_kernel_range((unsigned long)area->addr, area->size);
}
static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
- unsigned long end, pgprot_t prot, struct page ***pages)
+ unsigned long end, pgprot_t prot, struct page **pages, int *nr)
{
pte_t *pte;
+ /*
+ * nr is a running index into the array which helps higher level
+ * callers keep track of where we're up to.
+ */
+
pte = pte_alloc_kernel(pmd, addr);
if (!pte)
return -ENOMEM;
do {
- struct page *page = **pages;
- WARN_ON(!pte_none(*pte));
- if (!page)
+ struct page *page = pages[*nr];
+
+ if (WARN_ON(!pte_none(*pte)))
+ return -EBUSY;
+ if (WARN_ON(!page))
return -ENOMEM;
set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
- (*pages)++;
+ (*nr)++;
} while (pte++, addr += PAGE_SIZE, addr != end);
return 0;
}
-static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
- unsigned long end, pgprot_t prot, struct page ***pages)
+static int vmap_pmd_range(pud_t *pud, unsigned long addr,
+ unsigned long end, pgprot_t prot, struct page **pages, int *nr)
{
pmd_t *pmd;
unsigned long next;
@@ -124,14 +123,14 @@ static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
return -ENOMEM;
do {
next = pmd_addr_end(addr, end);
- if (vmap_pte_range(pmd, addr, next, prot, pages))
+ if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
return -ENOMEM;
} while (pmd++, addr = next, addr != end);
return 0;
}
-static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
- unsigned long end, pgprot_t prot, struct page ***pages)
+static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end, pgprot_t prot, struct page **pages, int *nr)
{
pud_t *pud;
unsigned long next;
@@ -141,44 +140,49 @@ static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
return -ENOMEM;
do {
next = pud_addr_end(addr, end);
- if (vmap_pmd_range(pud, addr, next, prot, pages))
+ if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
return -ENOMEM;
} while (pud++, addr = next, addr != end);
return 0;
}
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+/*
+ * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
+ * will have pfns corresponding to the "pages" array.
+ *
+ * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
+ */
+static int vmap_page_range(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages)
{
pgd_t *pgd;
unsigned long next;
- unsigned long addr = (unsigned long) area->addr;
- unsigned long end = addr + area->size - PAGE_SIZE;
- int err;
+ int err = 0;
+ int nr = 0;
BUG_ON(addr >= end);
pgd = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
- err = vmap_pud_range(pgd, addr, next, prot, pages);
+ err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
if (err)
break;
} while (pgd++, addr = next, addr != end);
- flush_cache_vmap((unsigned long) area->addr, end);
- return err;
+ flush_cache_vmap(addr, end);
+
+ if (unlikely(err))
+ return err;
+ return nr;
}
-EXPORT_SYMBOL_GPL(map_vm_area);
/*
- * Map a vmalloc()-space virtual address to the physical page.
+ * Walk a vmap address to the struct page it maps.
*/
struct page *vmalloc_to_page(const void *vmalloc_addr)
{
unsigned long addr = (unsigned long) vmalloc_addr;
struct page *page = NULL;
pgd_t *pgd = pgd_offset_k(addr);
- pud_t *pud;
- pmd_t *pmd;
- pte_t *ptep, pte;
/*
* XXX we might need to change this if we add VIRTUAL_BUG_ON for
@@ -188,10 +192,12 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
!is_module_address(addr));
if (!pgd_none(*pgd)) {
- pud = pud_offset(pgd, addr);
+ pud_t *pud = pud_offset(pgd, addr);
if (!pud_none(*pud)) {
- pmd = pmd_offset(pud, addr);
+ pmd_t *pmd = pmd_offset(pud, addr);
if (!pmd_none(*pmd)) {
+ pte_t *ptep, pte;
+
ptep = pte_offset_map(pmd, addr);
pte = *ptep;
if (pte_present(pte))
@@ -213,13 +219,751 @@ unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
}
EXPORT_SYMBOL(vmalloc_to_pfn);
-static struct vm_struct *
-__get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
- unsigned long end, int node, gfp_t gfp_mask, void *caller)
+
+/*** Global kva allocator ***/
+
+#define VM_LAZY_FREE 0x01
+#define VM_LAZY_FREEING 0x02
+#define VM_VM_AREA 0x04
+
+struct vmap_area {
+ unsigned long va_start;
+ unsigned long va_end;
+ unsigned long flags;
+ struct rb_node rb_node; /* address sorted rbtree */
+ struct list_head list; /* address sorted list */
+ struct list_head purge_list; /* "lazy purge" list */
+ void *private;
+ struct rcu_head rcu_head;
+};
+
+static DEFINE_SPINLOCK(vmap_area_lock);
+static struct rb_root vmap_area_root = RB_ROOT;
+static LIST_HEAD(vmap_area_list);
+
+static struct vmap_area *__find_vmap_area(unsigned long addr)
{
- struct vm_struct **p, *tmp, *area;
- unsigned long align = 1;
+ struct rb_node *n = vmap_area_root.rb_node;
+
+ while (n) {
+ struct vmap_area *va;
+
+ va = rb_entry(n, struct vmap_area, rb_node);
+ if (addr < va->va_start)
+ n = n->rb_left;
+ else if (addr > va->va_start)
+ n = n->rb_right;
+ else
+ return va;
+ }
+
+ return NULL;
+}
+
+static void __insert_vmap_area(struct vmap_area *va)
+{
+ struct rb_node **p = &vmap_area_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct rb_node *tmp;
+
+ while (*p) {
+ struct vmap_area *tmp;
+
+ parent = *p;
+ tmp = rb_entry(parent, struct vmap_area, rb_node);
+ if (va->va_start < tmp->va_end)
+ p = &(*p)->rb_left;
+ else if (va->va_end > tmp->va_start)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&va->rb_node, parent, p);
+ rb_insert_color(&va->rb_node, &vmap_area_root);
+
+ /* address-sort this list so it is usable like the vmlist */
+ tmp = rb_prev(&va->rb_node);
+ if (tmp) {
+ struct vmap_area *prev;
+ prev = rb_entry(tmp, struct vmap_area, rb_node);
+ list_add_rcu(&va->list, &prev->list);
+ } else
+ list_add_rcu(&va->list, &vmap_area_list);
+}
+
+static void purge_vmap_area_lazy(void);
+
+/*
+ * Allocate a region of KVA of the specified size and alignment, within the
+ * vstart and vend.
+ */
+static struct vmap_area *alloc_vmap_area(unsigned long size,
+ unsigned long align,
+ unsigned long vstart, unsigned long vend,
+ int node, gfp_t gfp_mask)
+{
+ struct vmap_area *va;
+ struct rb_node *n;
+ unsigned long addr;
+ int purged = 0;
+
+ BUG_ON(size & ~PAGE_MASK);
+
+ addr = ALIGN(vstart, align);
+
+ va = kmalloc_node(sizeof(struct vmap_area),
+ gfp_mask & GFP_RECLAIM_MASK, node);
+ if (unlikely(!va))
+ return ERR_PTR(-ENOMEM);
+
+retry:
+ spin_lock(&vmap_area_lock);
+ /* XXX: could have a last_hole cache */
+ n = vmap_area_root.rb_node;
+ if (n) {
+ struct vmap_area *first = NULL;
+
+ do {
+ struct vmap_area *tmp;
+ tmp = rb_entry(n, struct vmap_area, rb_node);
+ if (tmp->va_end >= addr) {
+ if (!first && tmp->va_start < addr + size)
+ first = tmp;
+ n = n->rb_left;
+ } else {
+ first = tmp;
+ n = n->rb_right;
+ }
+ } while (n);
+
+ if (!first)
+ goto found;
+
+ if (first->va_end < addr) {
+ n = rb_next(&first->rb_node);
+ if (n)
+ first = rb_entry(n, struct vmap_area, rb_node);
+ else
+ goto found;
+ }
+
+ while (addr + size >= first->va_start && addr + size <= vend) {
+ addr = ALIGN(first->va_end + PAGE_SIZE, align);
+
+ n = rb_next(&first->rb_node);
+ if (n)
+ first = rb_entry(n, struct vmap_area, rb_node);
+ else
+ goto found;
+ }
+ }
+found:
+ if (addr + size > vend) {
+ spin_unlock(&vmap_area_lock);
+ if (!purged) {
+ purge_vmap_area_lazy();
+ purged = 1;
+ goto retry;
+ }
+ if (printk_ratelimit())
+ printk(KERN_WARNING "vmap allocation failed: "
+ "use vmalloc=<size> to increase size.\n");
+ return ERR_PTR(-EBUSY);
+ }
+
+ BUG_ON(addr & (align-1));
+
+ va->va_start = addr;
+ va->va_end = addr + size;
+ va->flags = 0;
+ __insert_vmap_area(va);
+ spin_unlock(&vmap_area_lock);
+
+ return va;
+}
+
+static void rcu_free_va(struct rcu_head *head)
+{
+ struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
+
+ kfree(va);
+}
+
+static void __free_vmap_area(struct vmap_area *va)
+{
+ BUG_ON(RB_EMPTY_NODE(&va->rb_node));
+ rb_erase(&va->rb_node, &vmap_area_root);
+ RB_CLEAR_NODE(&va->rb_node);
+ list_del_rcu(&va->list);
+
+ call_rcu(&va->rcu_head, rcu_free_va);
+}
+
+/*
+ * Free a region of KVA allocated by alloc_vmap_area
+ */
+static void free_vmap_area(struct vmap_area *va)
+{
+ spin_lock(&vmap_area_lock);
+ __free_vmap_area(va);
+ spin_unlock(&vmap_area_lock);
+}
+
+/*
+ * Clear the pagetable entries of a given vmap_area
+ */
+static void unmap_vmap_area(struct vmap_area *va)
+{
+ vunmap_page_range(va->va_start, va->va_end);
+}
+
+/*
+ * lazy_max_pages is the maximum amount of virtual address space we gather up
+ * before attempting to purge with a TLB flush.
+ *
+ * There is a tradeoff here: a larger number will cover more kernel page tables
+ * and take slightly longer to purge, but it will linearly reduce the number of
+ * global TLB flushes that must be performed. It would seem natural to scale
+ * this number up linearly with the number of CPUs (because vmapping activity
+ * could also scale linearly with the number of CPUs), however it is likely
+ * that in practice, workloads might be constrained in other ways that mean
+ * vmap activity will not scale linearly with CPUs. Also, I want to be
+ * conservative and not introduce a big latency on huge systems, so go with
+ * a less aggressive log scale. It will still be an improvement over the old
+ * code, and it will be simple to change the scale factor if we find that it
+ * becomes a problem on bigger systems.
+ */
+static unsigned long lazy_max_pages(void)
+{
+ unsigned int log;
+
+ log = fls(num_online_cpus());
+
+ return log * (32UL * 1024 * 1024 / PAGE_SIZE);
+}
+
+static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+
+/*
+ * Purges all lazily-freed vmap areas.
+ *
+ * If sync is 0 then don't purge if there is already a purge in progress.
+ * If force_flush is 1, then flush kernel TLBs between *start and *end even
+ * if we found no lazy vmap areas to unmap (callers can use this to optimise
+ * their own TLB flushing).
+ * Returns with *start = min(*start, lowest purged address)
+ * *end = max(*end, highest purged address)
+ */
+static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
+ int sync, int force_flush)
+{
+ static DEFINE_SPINLOCK(purge_lock);
+ LIST_HEAD(valist);
+ struct vmap_area *va;
+ int nr = 0;
+
+ /*
+ * If sync is 0 but force_flush is 1, we'll go sync anyway but callers
+ * should not expect such behaviour. This just simplifies locking for
+ * the case that isn't actually used at the moment anyway.
+ */
+ if (!sync && !force_flush) {
+ if (!spin_trylock(&purge_lock))
+ return;
+ } else
+ spin_lock(&purge_lock);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(va, &vmap_area_list, list) {
+ if (va->flags & VM_LAZY_FREE) {
+ if (va->va_start < *start)
+ *start = va->va_start;
+ if (va->va_end > *end)
+ *end = va->va_end;
+ nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
+ unmap_vmap_area(va);
+ list_add_tail(&va->purge_list, &valist);
+ va->flags |= VM_LAZY_FREEING;
+ va->flags &= ~VM_LAZY_FREE;
+ }
+ }
+ rcu_read_unlock();
+
+ if (nr) {
+ BUG_ON(nr > atomic_read(&vmap_lazy_nr));
+ atomic_sub(nr, &vmap_lazy_nr);
+ }
+
+ if (nr || force_flush)
+ flush_tlb_kernel_range(*start, *end);
+
+ if (nr) {
+ spin_lock(&vmap_area_lock);
+ list_for_each_entry(va, &valist, purge_list)
+ __free_vmap_area(va);
+ spin_unlock(&vmap_area_lock);
+ }
+ spin_unlock(&purge_lock);
+}
+
+/*
+ * Kick off a purge of the outstanding lazy areas.
+ */
+static void purge_vmap_area_lazy(void)
+{
+ unsigned long start = ULONG_MAX, end = 0;
+
+ __purge_vmap_area_lazy(&start, &end, 0, 0);
+}
+
+/*
+ * Free and unmap a vmap area
+ */
+static void free_unmap_vmap_area(struct vmap_area *va)
+{
+ va->flags |= VM_LAZY_FREE;
+ atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
+ if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
+ purge_vmap_area_lazy();
+}
+
+static struct vmap_area *find_vmap_area(unsigned long addr)
+{
+ struct vmap_area *va;
+
+ spin_lock(&vmap_area_lock);
+ va = __find_vmap_area(addr);
+ spin_unlock(&vmap_area_lock);
+
+ return va;
+}
+
+static void free_unmap_vmap_area_addr(unsigned long addr)
+{
+ struct vmap_area *va;
+
+ va = find_vmap_area(addr);
+ BUG_ON(!va);
+ free_unmap_vmap_area(va);
+}
+
+
+/*** Per cpu kva allocator ***/
+
+/*
+ * vmap space is limited especially on 32 bit architectures. Ensure there is
+ * room for at least 16 percpu vmap blocks per CPU.
+ */
+/*
+ * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
+ * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess
+ * instead (we just need a rough idea)
+ */
+#if BITS_PER_LONG == 32
+#define VMALLOC_SPACE (128UL*1024*1024)
+#else
+#define VMALLOC_SPACE (128UL*1024*1024*1024)
+#endif
+
+#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
+#define VMAP_MAX_ALLOC BITS_PER_LONG /* 256K with 4K pages */
+#define VMAP_BBMAP_BITS_MAX 1024 /* 4MB with 4K pages */
+#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
+#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* can't use min() */
+#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y)) /* can't use max() */
+#define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
+ VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
+ VMALLOC_PAGES / NR_CPUS / 16))
+
+#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
+
+struct vmap_block_queue {
+ spinlock_t lock;
+ struct list_head free;
+ struct list_head dirty;
+ unsigned int nr_dirty;
+};
+
+struct vmap_block {
+ spinlock_t lock;
+ struct vmap_area *va;
+ struct vmap_block_queue *vbq;
+ unsigned long free, dirty;
+ DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
+ DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
+ union {
+ struct {
+ struct list_head free_list;
+ struct list_head dirty_list;
+ };
+ struct rcu_head rcu_head;
+ };
+};
+
+/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
+static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
+
+/*
+ * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
+ * in the free path. Could get rid of this if we change the API to return a
+ * "cookie" from alloc, to be passed to free. But no big deal yet.
+ */
+static DEFINE_SPINLOCK(vmap_block_tree_lock);
+static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
+
+/*
+ * We should probably have a fallback mechanism to allocate virtual memory
+ * out of partially filled vmap blocks. However vmap block sizing should be
+ * fairly reasonable according to the vmalloc size, so it shouldn't be a
+ * big problem.
+ */
+
+static unsigned long addr_to_vb_idx(unsigned long addr)
+{
+ addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
+ addr /= VMAP_BLOCK_SIZE;
+ return addr;
+}
+
+static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
+{
+ struct vmap_block_queue *vbq;
+ struct vmap_block *vb;
+ struct vmap_area *va;
+ unsigned long vb_idx;
+ int node, err;
+
+ node = numa_node_id();
+
+ vb = kmalloc_node(sizeof(struct vmap_block),
+ gfp_mask & GFP_RECLAIM_MASK, node);
+ if (unlikely(!vb))
+ return ERR_PTR(-ENOMEM);
+
+ va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ node, gfp_mask);
+ if (unlikely(IS_ERR(va))) {
+ kfree(vb);
+ return ERR_PTR(PTR_ERR(va));
+ }
+
+ err = radix_tree_preload(gfp_mask);
+ if (unlikely(err)) {
+ kfree(vb);
+ free_vmap_area(va);
+ return ERR_PTR(err);
+ }
+
+ spin_lock_init(&vb->lock);
+ vb->va = va;
+ vb->free = VMAP_BBMAP_BITS;
+ vb->dirty = 0;
+ bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
+ bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
+ INIT_LIST_HEAD(&vb->free_list);
+ INIT_LIST_HEAD(&vb->dirty_list);
+
+ vb_idx = addr_to_vb_idx(va->va_start);
+ spin_lock(&vmap_block_tree_lock);
+ err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
+ spin_unlock(&vmap_block_tree_lock);
+ BUG_ON(err);
+ radix_tree_preload_end();
+
+ vbq = &get_cpu_var(vmap_block_queue);
+ vb->vbq = vbq;
+ spin_lock(&vbq->lock);
+ list_add(&vb->free_list, &vbq->free);
+ spin_unlock(&vbq->lock);
+ put_cpu_var(vmap_cpu_blocks);
+
+ return vb;
+}
+
+static void rcu_free_vb(struct rcu_head *head)
+{
+ struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
+
+ kfree(vb);
+}
+
+static void free_vmap_block(struct vmap_block *vb)
+{
+ struct vmap_block *tmp;
+ unsigned long vb_idx;
+
+ spin_lock(&vb->vbq->lock);
+ if (!list_empty(&vb->free_list))
+ list_del(&vb->free_list);
+ if (!list_empty(&vb->dirty_list))
+ list_del(&vb->dirty_list);
+ spin_unlock(&vb->vbq->lock);
+
+ vb_idx = addr_to_vb_idx(vb->va->va_start);
+ spin_lock(&vmap_block_tree_lock);
+ tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
+ spin_unlock(&vmap_block_tree_lock);
+ BUG_ON(tmp != vb);
+
+ free_unmap_vmap_area(vb->va);
+ call_rcu(&vb->rcu_head, rcu_free_vb);
+}
+
+static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+{
+ struct vmap_block_queue *vbq;
+ struct vmap_block *vb;
+ unsigned long addr = 0;
+ unsigned int order;
+
+ BUG_ON(size & ~PAGE_MASK);
+ BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+ order = get_order(size);
+
+again:
+ rcu_read_lock();
+ vbq = &get_cpu_var(vmap_block_queue);
+ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+ int i;
+
+ spin_lock(&vb->lock);
+ i = bitmap_find_free_region(vb->alloc_map,
+ VMAP_BBMAP_BITS, order);
+
+ if (i >= 0) {
+ addr = vb->va->va_start + (i << PAGE_SHIFT);
+ BUG_ON(addr_to_vb_idx(addr) !=
+ addr_to_vb_idx(vb->va->va_start));
+ vb->free -= 1UL << order;
+ if (vb->free == 0) {
+ spin_lock(&vbq->lock);
+ list_del_init(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ }
+ spin_unlock(&vb->lock);
+ break;
+ }
+ spin_unlock(&vb->lock);
+ }
+ put_cpu_var(vmap_cpu_blocks);
+ rcu_read_unlock();
+
+ if (!addr) {
+ vb = new_vmap_block(gfp_mask);
+ if (IS_ERR(vb))
+ return vb;
+ goto again;
+ }
+
+ return (void *)addr;
+}
+
+static void vb_free(const void *addr, unsigned long size)
+{
+ unsigned long offset;
+ unsigned long vb_idx;
+ unsigned int order;
+ struct vmap_block *vb;
+
+ BUG_ON(size & ~PAGE_MASK);
+ BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+ order = get_order(size);
+
+ offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
+
+ vb_idx = addr_to_vb_idx((unsigned long)addr);
+ rcu_read_lock();
+ vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
+ rcu_read_unlock();
+ BUG_ON(!vb);
+
+ spin_lock(&vb->lock);
+ bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
+ if (!vb->dirty) {
+ spin_lock(&vb->vbq->lock);
+ list_add(&vb->dirty_list, &vb->vbq->dirty);
+ spin_unlock(&vb->vbq->lock);
+ }
+ vb->dirty += 1UL << order;
+ if (vb->dirty == VMAP_BBMAP_BITS) {
+ BUG_ON(vb->free || !list_empty(&vb->free_list));
+ spin_unlock(&vb->lock);
+ free_vmap_block(vb);
+ } else
+ spin_unlock(&vb->lock);
+}
+
+/**
+ * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
+ *
+ * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
+ * to amortize TLB flushing overheads. What this means is that any page you
+ * have now, may, in a former life, have been mapped into kernel virtual
+ * address by the vmap layer and so there might be some CPUs with TLB entries
+ * still referencing that page (additional to the regular 1:1 kernel mapping).
+ *
+ * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
+ * be sure that none of the pages we have control over will have any aliases
+ * from the vmap layer.
+ */
+void vm_unmap_aliases(void)
+{
+ unsigned long start = ULONG_MAX, end = 0;
+ int cpu;
+ int flush = 0;
+
+ for_each_possible_cpu(cpu) {
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
+ struct vmap_block *vb;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+ int i;
+
+ spin_lock(&vb->lock);
+ i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
+ while (i < VMAP_BBMAP_BITS) {
+ unsigned long s, e;
+ int j;
+ j = find_next_zero_bit(vb->dirty_map,
+ VMAP_BBMAP_BITS, i);
+
+ s = vb->va->va_start + (i << PAGE_SHIFT);
+ e = vb->va->va_start + (j << PAGE_SHIFT);
+ vunmap_page_range(s, e);
+ flush = 1;
+
+ if (s < start)
+ start = s;
+ if (e > end)
+ end = e;
+
+ i = j;
+ i = find_next_bit(vb->dirty_map,
+ VMAP_BBMAP_BITS, i);
+ }
+ spin_unlock(&vb->lock);
+ }
+ rcu_read_unlock();
+ }
+
+ __purge_vmap_area_lazy(&start, &end, 1, flush);
+}
+EXPORT_SYMBOL_GPL(vm_unmap_aliases);
+
+/**
+ * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
+ * @mem: the pointer returned by vm_map_ram
+ * @count: the count passed to that vm_map_ram call (cannot unmap partial)
+ */
+void vm_unmap_ram(const void *mem, unsigned int count)
+{
+ unsigned long size = count << PAGE_SHIFT;
+ unsigned long addr = (unsigned long)mem;
+
+ BUG_ON(!addr);
+ BUG_ON(addr < VMALLOC_START);
+ BUG_ON(addr > VMALLOC_END);
+ BUG_ON(addr & (PAGE_SIZE-1));
+
+ debug_check_no_locks_freed(mem, size);
+
+ if (likely(count <= VMAP_MAX_ALLOC))
+ vb_free(mem, size);
+ else
+ free_unmap_vmap_area_addr(addr);
+}
+EXPORT_SYMBOL(vm_unmap_ram);
+
+/**
+ * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
+ * @pages: an array of pointers to the pages to be mapped
+ * @count: number of pages
+ * @node: prefer to allocate data structures on this node
+ * @prot: memory protection to use. PAGE_KERNEL for regular RAM
+ * @returns: a pointer to the address that has been mapped, or NULL on failure
+ */
+void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
+{
+ unsigned long size = count << PAGE_SHIFT;
unsigned long addr;
+ void *mem;
+
+ if (likely(count <= VMAP_MAX_ALLOC)) {
+ mem = vb_alloc(size, GFP_KERNEL);
+ if (IS_ERR(mem))
+ return NULL;
+ addr = (unsigned long)mem;
+ } else {
+ struct vmap_area *va;
+ va = alloc_vmap_area(size, PAGE_SIZE,
+ VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
+ if (IS_ERR(va))
+ return NULL;
+
+ addr = va->va_start;
+ mem = (void *)addr;
+ }
+ if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
+ vm_unmap_ram(mem, count);
+ return NULL;
+ }
+ return mem;
+}
+EXPORT_SYMBOL(vm_map_ram);
+
+void __init vmalloc_init(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct vmap_block_queue *vbq;
+
+ vbq = &per_cpu(vmap_block_queue, i);
+ spin_lock_init(&vbq->lock);
+ INIT_LIST_HEAD(&vbq->free);
+ INIT_LIST_HEAD(&vbq->dirty);
+ vbq->nr_dirty = 0;
+ }
+}
+
+void unmap_kernel_range(unsigned long addr, unsigned long size)
+{
+ unsigned long end = addr + size;
+ vunmap_page_range(addr, end);
+ flush_tlb_kernel_range(addr, end);
+}
+
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+{
+ unsigned long addr = (unsigned long)area->addr;
+ unsigned long end = addr + area->size - PAGE_SIZE;
+ int err;
+
+ err = vmap_page_range(addr, end, prot, *pages);
+ if (err > 0) {
+ *pages += err;
+ err = 0;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(map_vm_area);
+
+/*** Old vmalloc interfaces ***/
+DEFINE_RWLOCK(vmlist_lock);
+struct vm_struct *vmlist;
+
+static struct vm_struct *__get_vm_area_node(unsigned long size,
+ unsigned long flags, unsigned long start, unsigned long end,
+ int node, gfp_t gfp_mask, void *caller)
+{
+ static struct vmap_area *va;
+ struct vm_struct *area;
+ struct vm_struct *tmp, **p;
+ unsigned long align = 1;
BUG_ON(in_interrupt());
if (flags & VM_IOREMAP) {
@@ -232,13 +976,12 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
align = 1ul << bit;
}
- addr = ALIGN(start, align);
+
size = PAGE_ALIGN(size);
if (unlikely(!size))
return NULL;
area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
-
if (unlikely(!area))
return NULL;
@@ -247,48 +990,32 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
*/
size += PAGE_SIZE;
- write_lock(&vmlist_lock);
- for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
- if ((unsigned long)tmp->addr < addr) {
- if((unsigned long)tmp->addr + tmp->size >= addr)
- addr = ALIGN(tmp->size +
- (unsigned long)tmp->addr, align);
- continue;
- }
- if ((size + addr) < addr)
- goto out;
- if (size + addr <= (unsigned long)tmp->addr)
- goto found;
- addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
- if (addr > end - size)
- goto out;
+ va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
+ if (IS_ERR(va)) {
+ kfree(area);
+ return NULL;
}
- if ((size + addr) < addr)
- goto out;
- if (addr > end - size)
- goto out;
-
-found:
- area->next = *p;
- *p = area;
area->flags = flags;
- area->addr = (void *)addr;
+ area->addr = (void *)va->va_start;
area->size = size;
area->pages = NULL;
area->nr_pages = 0;
area->phys_addr = 0;
area->caller = caller;
+ va->private = area;
+ va->flags |= VM_VM_AREA;
+
+ write_lock(&vmlist_lock);
+ for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
+ if (tmp->addr >= area->addr)
+ break;
+ }
+ area->next = *p;
+ *p = area;
write_unlock(&vmlist_lock);
return area;
-
-out:
- write_unlock(&vmlist_lock);
- kfree(area);
- if (printk_ratelimit())
- printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
- return NULL;
}
struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
@@ -328,39 +1055,15 @@ struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
gfp_mask, __builtin_return_address(0));
}
-/* Caller must hold vmlist_lock */
-static struct vm_struct *__find_vm_area(const void *addr)
+static struct vm_struct *find_vm_area(const void *addr)
{
- struct vm_struct *tmp;
+ struct vmap_area *va;
- for (tmp = vmlist; tmp != NULL; tmp = tmp->next) {
- if (tmp->addr == addr)
- break;
- }
-
- return tmp;
-}
-
-/* Caller must hold vmlist_lock */
-static struct vm_struct *__remove_vm_area(const void *addr)
-{
- struct vm_struct **p, *tmp;
+ va = find_vmap_area((unsigned long)addr);
+ if (va && va->flags & VM_VM_AREA)
+ return va->private;
- for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
- if (tmp->addr == addr)
- goto found;
- }
return NULL;
-
-found:
- unmap_vm_area(tmp);
- *p = tmp->next;
-
- /*
- * Remove the guard page.
- */
- tmp->size -= PAGE_SIZE;
- return tmp;
}
/**
@@ -373,11 +1076,24 @@ found:
*/
struct vm_struct *remove_vm_area(const void *addr)
{
- struct vm_struct *v;
- write_lock(&vmlist_lock);
- v = __remove_vm_area(addr);
- write_unlock(&vmlist_lock);
- return v;
+ struct vmap_area *va;
+
+ va = find_vmap_area((unsigned long)addr);
+ if (va && va->flags & VM_VM_AREA) {
+ struct vm_struct *vm = va->private;
+ struct vm_struct *tmp, **p;
+ free_unmap_vmap_area(va);
+ vm->size -= PAGE_SIZE;
+
+ write_lock(&vmlist_lock);
+ for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
+ ;
+ *p = tmp->next;
+ write_unlock(&vmlist_lock);
+
+ return vm;
+ }
+ return NULL;
}
static void __vunmap(const void *addr, int deallocate_pages)
@@ -487,6 +1203,8 @@ void *vmap(struct page **pages, unsigned int count,
}
EXPORT_SYMBOL(vmap);
+static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+ int node, void *caller);
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node, void *caller)
{
@@ -613,10 +1331,8 @@ void *vmalloc_user(unsigned long size)
ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
if (ret) {
- write_lock(&vmlist_lock);
- area = __find_vm_area(ret);
+ area = find_vm_area(ret);
area->flags |= VM_USERMAP;
- write_unlock(&vmlist_lock);
}
return ret;
}
@@ -696,10 +1412,8 @@ void *vmalloc_32_user(unsigned long size)
ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL);
if (ret) {
- write_lock(&vmlist_lock);
- area = __find_vm_area(ret);
+ area = find_vm_area(ret);
area->flags |= VM_USERMAP;
- write_unlock(&vmlist_lock);
}
return ret;
}
@@ -800,26 +1514,25 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
struct vm_struct *area;
unsigned long uaddr = vma->vm_start;
unsigned long usize = vma->vm_end - vma->vm_start;
- int ret;
if ((PAGE_SIZE-1) & (unsigned long)addr)
return -EINVAL;
- read_lock(&vmlist_lock);
- area = __find_vm_area(addr);
+ area = find_vm_area(addr);
if (!area)
- goto out_einval_locked;
+ return -EINVAL;
if (!(area->flags & VM_USERMAP))
- goto out_einval_locked;
+ return -EINVAL;
if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
- goto out_einval_locked;
- read_unlock(&vmlist_lock);
+ return -EINVAL;
addr += pgoff << PAGE_SHIFT;
do {
struct page *page = vmalloc_to_page(addr);
+ int ret;
+
ret = vm_insert_page(vma, uaddr, page);
if (ret)
return ret;
@@ -832,11 +1545,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
/* Prevent "things" like memory migration? VM_flags need a cleanup... */
vma->vm_flags |= VM_RESERVED;
- return ret;
-
-out_einval_locked:
- read_unlock(&vmlist_lock);
- return -EINVAL;
+ return 0;
}
EXPORT_SYMBOL(remap_vmalloc_range);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1ff1a58e7c1..3b5860294bb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -39,6 +39,7 @@
#include <linux/freezer.h>
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
+#include <linux/sysctl.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -78,7 +79,7 @@ struct scan_control {
unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
unsigned long *scanned, int order, int mode,
struct zone *z, struct mem_cgroup *mem_cont,
- int active);
+ int active, int file);
};
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -470,6 +471,85 @@ int remove_mapping(struct address_space *mapping, struct page *page)
return 0;
}
+/**
+ * putback_lru_page - put previously isolated page onto appropriate LRU list
+ * @page: page to be put back to appropriate lru list
+ *
+ * Add previously isolated @page to appropriate LRU list.
+ * Page may still be unevictable for other reasons.
+ *
+ * lru_lock must not be held, interrupts must be enabled.
+ */
+#ifdef CONFIG_UNEVICTABLE_LRU
+void putback_lru_page(struct page *page)
+{
+ int lru;
+ int active = !!TestClearPageActive(page);
+ int was_unevictable = PageUnevictable(page);
+
+ VM_BUG_ON(PageLRU(page));
+
+redo:
+ ClearPageUnevictable(page);
+
+ if (page_evictable(page, NULL)) {
+ /*
+ * For evictable pages, we can use the cache.
+ * In event of a race, worst case is we end up with an
+ * unevictable page on [in]active list.
+ * We know how to handle that.
+ */
+ lru = active + page_is_file_cache(page);
+ lru_cache_add_lru(page, lru);
+ } else {
+ /*
+ * Put unevictable pages directly on zone's unevictable
+ * list.
+ */
+ lru = LRU_UNEVICTABLE;
+ add_page_to_unevictable_list(page);
+ }
+ mem_cgroup_move_lists(page, lru);
+
+ /*
+ * page's status can change while we move it among lru. If an evictable
+ * page is on unevictable list, it never be freed. To avoid that,
+ * check after we added it to the list, again.
+ */
+ if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
+ if (!isolate_lru_page(page)) {
+ put_page(page);
+ goto redo;
+ }
+ /* This means someone else dropped this page from LRU
+ * So, it will be freed or putback to LRU again. There is
+ * nothing to do here.
+ */
+ }
+
+ if (was_unevictable && lru != LRU_UNEVICTABLE)
+ count_vm_event(UNEVICTABLE_PGRESCUED);
+ else if (!was_unevictable && lru == LRU_UNEVICTABLE)
+ count_vm_event(UNEVICTABLE_PGCULLED);
+
+ put_page(page); /* drop ref from isolate */
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+
+void putback_lru_page(struct page *page)
+{
+ int lru;
+ VM_BUG_ON(PageLRU(page));
+
+ lru = !!TestClearPageActive(page) + page_is_file_cache(page);
+ lru_cache_add_lru(page, lru);
+ mem_cgroup_move_lists(page, lru);
+ put_page(page);
+}
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
+
/*
* shrink_page_list() returns the number of reclaimed pages
*/
@@ -503,6 +583,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
sc->nr_scanned++;
+ if (unlikely(!page_evictable(page, NULL)))
+ goto cull_mlocked;
+
if (!sc->may_swap && page_mapped(page))
goto keep_locked;
@@ -539,9 +622,19 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
*/
- if (PageAnon(page) && !PageSwapCache(page))
+ if (PageAnon(page) && !PageSwapCache(page)) {
+ switch (try_to_munlock(page)) {
+ case SWAP_FAIL: /* shouldn't happen */
+ case SWAP_AGAIN:
+ goto keep_locked;
+ case SWAP_MLOCK:
+ goto cull_mlocked;
+ case SWAP_SUCCESS:
+ ; /* fall thru'; add to swap cache */
+ }
if (!add_to_swap(page, GFP_ATOMIC))
goto activate_locked;
+ }
#endif /* CONFIG_SWAP */
mapping = page_mapping(page);
@@ -556,6 +649,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto activate_locked;
case SWAP_AGAIN:
goto keep_locked;
+ case SWAP_MLOCK:
+ goto cull_mlocked;
case SWAP_SUCCESS:
; /* try to free the page below */
}
@@ -602,7 +697,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* possible for a page to have PageDirty set, but it is actually
* clean (all its buffers are clean). This happens if the
* buffers were written out directly, with submit_bh(). ext3
- * will do this, as well as the blockdev mapping.
+ * will do this, as well as the blockdev mapping.
* try_to_release_page() will discover that cleanness and will
* drop the buffers and mark the page clean - it can be freed.
*
@@ -637,7 +732,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (!mapping || !__remove_mapping(mapping, page))
goto keep_locked;
- unlock_page(page);
+ /*
+ * At this point, we have no other references and there is
+ * no way to pick any more up (removed from LRU, removed
+ * from pagecache). Can use non-atomic bitops now (and
+ * we obviously don't have to worry about waking up a process
+ * waiting on the page lock, because there are no references.
+ */
+ __clear_page_locked(page);
free_it:
nr_reclaimed++;
if (!pagevec_add(&freed_pvec, page)) {
@@ -646,14 +748,23 @@ free_it:
}
continue;
+cull_mlocked:
+ unlock_page(page);
+ putback_lru_page(page);
+ continue;
+
activate_locked:
+ /* Not a candidate for swapping, so reclaim swap space. */
+ if (PageSwapCache(page) && vm_swap_full())
+ remove_exclusive_swap_page_ref(page);
+ VM_BUG_ON(PageActive(page));
SetPageActive(page);
pgactivate++;
keep_locked:
unlock_page(page);
keep:
list_add(&page->lru, &ret_pages);
- VM_BUG_ON(PageLRU(page));
+ VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
}
list_splice(&ret_pages, page_list);
if (pagevec_count(&freed_pvec))
@@ -677,7 +788,7 @@ keep:
*
* returns 0 on success, -ve errno on failure.
*/
-int __isolate_lru_page(struct page *page, int mode)
+int __isolate_lru_page(struct page *page, int mode, int file)
{
int ret = -EINVAL;
@@ -693,6 +804,17 @@ int __isolate_lru_page(struct page *page, int mode)
if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
return ret;
+ if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
+ return ret;
+
+ /*
+ * When this function is being called for lumpy reclaim, we
+ * initially look into all LRU pages, active, inactive and
+ * unevictable; only give shrink_page_list evictable pages.
+ */
+ if (PageUnevictable(page))
+ return ret;
+
ret = -EBUSY;
if (likely(get_page_unless_zero(page))) {
/*
@@ -723,12 +845,13 @@ int __isolate_lru_page(struct page *page, int mode)
* @scanned: The number of pages that were scanned.
* @order: The caller's attempted allocation order
* @mode: One of the LRU isolation modes
+ * @file: True [1] if isolating file [!anon] pages
*
* returns how many pages were moved onto *@dst.
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
struct list_head *src, struct list_head *dst,
- unsigned long *scanned, int order, int mode)
+ unsigned long *scanned, int order, int mode, int file)
{
unsigned long nr_taken = 0;
unsigned long scan;
@@ -745,7 +868,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
VM_BUG_ON(!PageLRU(page));
- switch (__isolate_lru_page(page, mode)) {
+ switch (__isolate_lru_page(page, mode, file)) {
case 0:
list_move(&page->lru, dst);
nr_taken++;
@@ -788,10 +911,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
break;
cursor_page = pfn_to_page(pfn);
+
/* Check that we have not crossed a zone boundary. */
if (unlikely(page_zone_id(cursor_page) != zone_id))
continue;
- switch (__isolate_lru_page(cursor_page, mode)) {
+ switch (__isolate_lru_page(cursor_page, mode, file)) {
case 0:
list_move(&cursor_page->lru, dst);
nr_taken++;
@@ -802,7 +926,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
/* else it is being freed elsewhere */
list_move(&cursor_page->lru, src);
default:
- break;
+ break; /* ! on LRU or wrong list */
}
}
}
@@ -816,40 +940,93 @@ static unsigned long isolate_pages_global(unsigned long nr,
unsigned long *scanned, int order,
int mode, struct zone *z,
struct mem_cgroup *mem_cont,
- int active)
+ int active, int file)
{
+ int lru = LRU_BASE;
if (active)
- return isolate_lru_pages(nr, &z->active_list, dst,
- scanned, order, mode);
- else
- return isolate_lru_pages(nr, &z->inactive_list, dst,
- scanned, order, mode);
+ lru += LRU_ACTIVE;
+ if (file)
+ lru += LRU_FILE;
+ return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
+ mode, !!file);
}
/*
* clear_active_flags() is a helper for shrink_active_list(), clearing
* any active bits from the pages in the list.
*/
-static unsigned long clear_active_flags(struct list_head *page_list)
+static unsigned long clear_active_flags(struct list_head *page_list,
+ unsigned int *count)
{
int nr_active = 0;
+ int lru;
struct page *page;
- list_for_each_entry(page, page_list, lru)
+ list_for_each_entry(page, page_list, lru) {
+ lru = page_is_file_cache(page);
if (PageActive(page)) {
+ lru += LRU_ACTIVE;
ClearPageActive(page);
nr_active++;
}
+ count[lru]++;
+ }
return nr_active;
}
+/**
+ * isolate_lru_page - tries to isolate a page from its LRU list
+ * @page: page to isolate from its LRU list
+ *
+ * Isolates a @page from an LRU list, clears PageLRU and adjusts the
+ * vmstat statistic corresponding to whatever LRU list the page was on.
+ *
+ * Returns 0 if the page was removed from an LRU list.
+ * Returns -EBUSY if the page was not on an LRU list.
+ *
+ * The returned page will have PageLRU() cleared. If it was found on
+ * the active list, it will have PageActive set. If it was found on
+ * the unevictable list, it will have the PageUnevictable bit set. That flag
+ * may need to be cleared by the caller before letting the page go.
+ *
+ * The vmstat statistic corresponding to the list on which the page was
+ * found will be decremented.
+ *
+ * Restrictions:
+ * (1) Must be called with an elevated refcount on the page. This is a
+ * fundamentnal difference from isolate_lru_pages (which is called
+ * without a stable reference).
+ * (2) the lru_lock must not be held.
+ * (3) interrupts must be enabled.
+ */
+int isolate_lru_page(struct page *page)
+{
+ int ret = -EBUSY;
+
+ if (PageLRU(page)) {
+ struct zone *zone = page_zone(page);
+
+ spin_lock_irq(&zone->lru_lock);
+ if (PageLRU(page) && get_page_unless_zero(page)) {
+ int lru = page_lru(page);
+ ret = 0;
+ ClearPageLRU(page);
+
+ del_page_from_lru_list(zone, page, lru);
+ }
+ spin_unlock_irq(&zone->lru_lock);
+ }
+ return ret;
+}
+
/*
* shrink_inactive_list() is a helper for shrink_zone(). It returns the number
* of reclaimed pages
*/
static unsigned long shrink_inactive_list(unsigned long max_scan,
- struct zone *zone, struct scan_control *sc)
+ struct zone *zone, struct scan_control *sc,
+ int priority, int file)
{
LIST_HEAD(page_list);
struct pagevec pvec;
@@ -866,20 +1043,43 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
unsigned long nr_scan;
unsigned long nr_freed;
unsigned long nr_active;
+ unsigned int count[NR_LRU_LISTS] = { 0, };
+ int mode = ISOLATE_INACTIVE;
+
+ /*
+ * If we need a large contiguous chunk of memory, or have
+ * trouble getting a small set of contiguous pages, we
+ * will reclaim both active and inactive pages.
+ *
+ * We use the same threshold as pageout congestion_wait below.
+ */
+ if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+ mode = ISOLATE_BOTH;
+ else if (sc->order && priority < DEF_PRIORITY - 2)
+ mode = ISOLATE_BOTH;
nr_taken = sc->isolate_pages(sc->swap_cluster_max,
- &page_list, &nr_scan, sc->order,
- (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
- ISOLATE_BOTH : ISOLATE_INACTIVE,
- zone, sc->mem_cgroup, 0);
- nr_active = clear_active_flags(&page_list);
+ &page_list, &nr_scan, sc->order, mode,
+ zone, sc->mem_cgroup, 0, file);
+ nr_active = clear_active_flags(&page_list, count);
__count_vm_events(PGDEACTIVATE, nr_active);
- __mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
- __mod_zone_page_state(zone, NR_INACTIVE,
- -(nr_taken - nr_active));
- if (scan_global_lru(sc))
+ __mod_zone_page_state(zone, NR_ACTIVE_FILE,
+ -count[LRU_ACTIVE_FILE]);
+ __mod_zone_page_state(zone, NR_INACTIVE_FILE,
+ -count[LRU_INACTIVE_FILE]);
+ __mod_zone_page_state(zone, NR_ACTIVE_ANON,
+ -count[LRU_ACTIVE_ANON]);
+ __mod_zone_page_state(zone, NR_INACTIVE_ANON,
+ -count[LRU_INACTIVE_ANON]);
+
+ if (scan_global_lru(sc)) {
zone->pages_scanned += nr_scan;
+ zone->recent_scanned[0] += count[LRU_INACTIVE_ANON];
+ zone->recent_scanned[0] += count[LRU_ACTIVE_ANON];
+ zone->recent_scanned[1] += count[LRU_INACTIVE_FILE];
+ zone->recent_scanned[1] += count[LRU_ACTIVE_FILE];
+ }
spin_unlock_irq(&zone->lru_lock);
nr_scanned += nr_scan;
@@ -899,7 +1099,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
* The attempt at page out may have made some
* of the pages active, mark them inactive again.
*/
- nr_active = clear_active_flags(&page_list);
+ nr_active = clear_active_flags(&page_list, count);
count_vm_events(PGDEACTIVATE, nr_active);
nr_freed += shrink_page_list(&page_list, sc,
@@ -924,14 +1124,24 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
* Put back any unfreeable pages.
*/
while (!list_empty(&page_list)) {
+ int lru;
page = lru_to_page(&page_list);
VM_BUG_ON(PageLRU(page));
- SetPageLRU(page);
list_del(&page->lru);
- if (PageActive(page))
- add_page_to_active_list(zone, page);
- else
- add_page_to_inactive_list(zone, page);
+ if (unlikely(!page_evictable(page, NULL))) {
+ spin_unlock_irq(&zone->lru_lock);
+ putback_lru_page(page);
+ spin_lock_irq(&zone->lru_lock);
+ continue;
+ }
+ SetPageLRU(page);
+ lru = page_lru(page);
+ add_page_to_lru_list(zone, page, lru);
+ mem_cgroup_move_lists(page, lru);
+ if (PageActive(page) && scan_global_lru(sc)) {
+ int file = !!page_is_file_cache(page);
+ zone->recent_rotated[file]++;
+ }
if (!pagevec_add(&pvec, page)) {
spin_unlock_irq(&zone->lru_lock);
__pagevec_release(&pvec);
@@ -962,115 +1172,7 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
static inline int zone_is_near_oom(struct zone *zone)
{
- return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE)
- + zone_page_state(zone, NR_INACTIVE))*3;
-}
-
-/*
- * Determine we should try to reclaim mapped pages.
- * This is called only when sc->mem_cgroup is NULL.
- */
-static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
- int priority)
-{
- long mapped_ratio;
- long distress;
- long swap_tendency;
- long imbalance;
- int reclaim_mapped = 0;
- int prev_priority;
-
- if (scan_global_lru(sc) && zone_is_near_oom(zone))
- return 1;
- /*
- * `distress' is a measure of how much trouble we're having
- * reclaiming pages. 0 -> no problems. 100 -> great trouble.
- */
- if (scan_global_lru(sc))
- prev_priority = zone->prev_priority;
- else
- prev_priority = mem_cgroup_get_reclaim_priority(sc->mem_cgroup);
-
- distress = 100 >> min(prev_priority, priority);
-
- /*
- * The point of this algorithm is to decide when to start
- * reclaiming mapped memory instead of just pagecache. Work out
- * how much memory
- * is mapped.
- */
- if (scan_global_lru(sc))
- mapped_ratio = ((global_page_state(NR_FILE_MAPPED) +
- global_page_state(NR_ANON_PAGES)) * 100) /
- vm_total_pages;
- else
- mapped_ratio = mem_cgroup_calc_mapped_ratio(sc->mem_cgroup);
-
- /*
- * Now decide how much we really want to unmap some pages. The
- * mapped ratio is downgraded - just because there's a lot of
- * mapped memory doesn't necessarily mean that page reclaim
- * isn't succeeding.
- *
- * The distress ratio is important - we don't want to start
- * going oom.
- *
- * A 100% value of vm_swappiness overrides this algorithm
- * altogether.
- */
- swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
-
- /*
- * If there's huge imbalance between active and inactive
- * (think active 100 times larger than inactive) we should
- * become more permissive, or the system will take too much
- * cpu before it start swapping during memory pressure.
- * Distress is about avoiding early-oom, this is about
- * making swappiness graceful despite setting it to low
- * values.
- *
- * Avoid div by zero with nr_inactive+1, and max resulting
- * value is vm_total_pages.
- */
- if (scan_global_lru(sc)) {
- imbalance = zone_page_state(zone, NR_ACTIVE);
- imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
- } else
- imbalance = mem_cgroup_reclaim_imbalance(sc->mem_cgroup);
-
- /*
- * Reduce the effect of imbalance if swappiness is low,
- * this means for a swappiness very low, the imbalance
- * must be much higher than 100 for this logic to make
- * the difference.
- *
- * Max temporary value is vm_total_pages*100.
- */
- imbalance *= (vm_swappiness + 1);
- imbalance /= 100;
-
- /*
- * If not much of the ram is mapped, makes the imbalance
- * less relevant, it's high priority we refill the inactive
- * list with mapped pages only in presence of high ratio of
- * mapped pages.
- *
- * Max temporary value is vm_total_pages*100.
- */
- imbalance *= mapped_ratio;
- imbalance /= 100;
-
- /* apply imbalance feedback to swap_tendency */
- swap_tendency += imbalance;
-
- /*
- * Now use this metric to decide whether to start moving mapped
- * memory onto the inactive list.
- */
- if (swap_tendency >= 100)
- reclaim_mapped = 1;
-
- return reclaim_mapped;
+ return zone->pages_scanned >= (zone_lru_pages(zone) * 3);
}
/*
@@ -1093,53 +1195,71 @@ static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
- struct scan_control *sc, int priority)
+ struct scan_control *sc, int priority, int file)
{
unsigned long pgmoved;
int pgdeactivate = 0;
unsigned long pgscanned;
LIST_HEAD(l_hold); /* The pages which were snipped off */
- LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */
- LIST_HEAD(l_active); /* Pages to go onto the active_list */
+ LIST_HEAD(l_inactive);
struct page *page;
struct pagevec pvec;
- int reclaim_mapped = 0;
-
- if (sc->may_swap)
- reclaim_mapped = calc_reclaim_mapped(sc, zone, priority);
+ enum lru_list lru;
lru_add_drain();
spin_lock_irq(&zone->lru_lock);
pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
ISOLATE_ACTIVE, zone,
- sc->mem_cgroup, 1);
+ sc->mem_cgroup, 1, file);
/*
* zone->pages_scanned is used for detect zone's oom
* mem_cgroup remembers nr_scan by itself.
*/
- if (scan_global_lru(sc))
+ if (scan_global_lru(sc)) {
zone->pages_scanned += pgscanned;
+ zone->recent_scanned[!!file] += pgmoved;
+ }
- __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
+ if (file)
+ __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
+ else
+ __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
spin_unlock_irq(&zone->lru_lock);
+ pgmoved = 0;
while (!list_empty(&l_hold)) {
cond_resched();
page = lru_to_page(&l_hold);
list_del(&page->lru);
- if (page_mapped(page)) {
- if (!reclaim_mapped ||
- (total_swap_pages == 0 && PageAnon(page)) ||
- page_referenced(page, 0, sc->mem_cgroup)) {
- list_add(&page->lru, &l_active);
- continue;
- }
+
+ if (unlikely(!page_evictable(page, NULL))) {
+ putback_lru_page(page);
+ continue;
}
+
+ /* page_referenced clears PageReferenced */
+ if (page_mapping_inuse(page) &&
+ page_referenced(page, 0, sc->mem_cgroup))
+ pgmoved++;
+
list_add(&page->lru, &l_inactive);
}
+ /*
+ * Count referenced pages from currently used mappings as
+ * rotated, even though they are moved to the inactive list.
+ * This helps balance scan pressure between file and anonymous
+ * pages in get_scan_ratio.
+ */
+ zone->recent_rotated[!!file] += pgmoved;
+
+ /*
+ * Move the pages to the [file or anon] inactive list.
+ */
pagevec_init(&pvec, 1);
+
pgmoved = 0;
+ lru = LRU_BASE + file * LRU_FILE;
spin_lock_irq(&zone->lru_lock);
while (!list_empty(&l_inactive)) {
page = lru_to_page(&l_inactive);
@@ -1149,11 +1269,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
VM_BUG_ON(!PageActive(page));
ClearPageActive(page);
- list_move(&page->lru, &zone->inactive_list);
- mem_cgroup_move_lists(page, false);
+ list_move(&page->lru, &zone->lru[lru].list);
+ mem_cgroup_move_lists(page, lru);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
- __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
+ __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
spin_unlock_irq(&zone->lru_lock);
pgdeactivate += pgmoved;
pgmoved = 0;
@@ -1163,104 +1283,189 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
spin_lock_irq(&zone->lru_lock);
}
}
- __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
+ __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
pgdeactivate += pgmoved;
if (buffer_heads_over_limit) {
spin_unlock_irq(&zone->lru_lock);
pagevec_strip(&pvec);
spin_lock_irq(&zone->lru_lock);
}
-
- pgmoved = 0;
- while (!list_empty(&l_active)) {
- page = lru_to_page(&l_active);
- prefetchw_prev_lru_page(page, &l_active, flags);
- VM_BUG_ON(PageLRU(page));
- SetPageLRU(page);
- VM_BUG_ON(!PageActive(page));
-
- list_move(&page->lru, &zone->active_list);
- mem_cgroup_move_lists(page, true);
- pgmoved++;
- if (!pagevec_add(&pvec, page)) {
- __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
- pgmoved = 0;
- spin_unlock_irq(&zone->lru_lock);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
- }
- }
- __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
-
__count_zone_vm_events(PGREFILL, zone, pgscanned);
__count_vm_events(PGDEACTIVATE, pgdeactivate);
spin_unlock_irq(&zone->lru_lock);
+ if (vm_swap_full())
+ pagevec_swap_free(&pvec);
pagevec_release(&pvec);
}
+static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
+ struct zone *zone, struct scan_control *sc, int priority)
+{
+ int file = is_file_lru(lru);
+
+ if (lru == LRU_ACTIVE_FILE) {
+ shrink_active_list(nr_to_scan, zone, sc, priority, file);
+ return 0;
+ }
+
+ if (lru == LRU_ACTIVE_ANON &&
+ (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
+ shrink_active_list(nr_to_scan, zone, sc, priority, file);
+ return 0;
+ }
+ return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
+}
+
+/*
+ * Determine how aggressively the anon and file LRU lists should be
+ * scanned. The relative value of each set of LRU lists is determined
+ * by looking at the fraction of the pages scanned we did rotate back
+ * onto the active list instead of evict.
+ *
+ * percent[0] specifies how much pressure to put on ram/swap backed
+ * memory, while percent[1] determines pressure on the file LRUs.
+ */
+static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
+ unsigned long *percent)
+{
+ unsigned long anon, file, free;
+ unsigned long anon_prio, file_prio;
+ unsigned long ap, fp;
+
+ anon = zone_page_state(zone, NR_ACTIVE_ANON) +
+ zone_page_state(zone, NR_INACTIVE_ANON);
+ file = zone_page_state(zone, NR_ACTIVE_FILE) +
+ zone_page_state(zone, NR_INACTIVE_FILE);
+ free = zone_page_state(zone, NR_FREE_PAGES);
+
+ /* If we have no swap space, do not bother scanning anon pages. */
+ if (nr_swap_pages <= 0) {
+ percent[0] = 0;
+ percent[1] = 100;
+ return;
+ }
+
+ /* If we have very few page cache pages, force-scan anon pages. */
+ if (unlikely(file + free <= zone->pages_high)) {
+ percent[0] = 100;
+ percent[1] = 0;
+ return;
+ }
+
+ /*
+ * OK, so we have swap space and a fair amount of page cache
+ * pages. We use the recently rotated / recently scanned
+ * ratios to determine how valuable each cache is.
+ *
+ * Because workloads change over time (and to avoid overflow)
+ * we keep these statistics as a floating average, which ends
+ * up weighing recent references more than old ones.
+ *
+ * anon in [0], file in [1]
+ */
+ if (unlikely(zone->recent_scanned[0] > anon / 4)) {
+ spin_lock_irq(&zone->lru_lock);
+ zone->recent_scanned[0] /= 2;
+ zone->recent_rotated[0] /= 2;
+ spin_unlock_irq(&zone->lru_lock);
+ }
+
+ if (unlikely(zone->recent_scanned[1] > file / 4)) {
+ spin_lock_irq(&zone->lru_lock);
+ zone->recent_scanned[1] /= 2;
+ zone->recent_rotated[1] /= 2;
+ spin_unlock_irq(&zone->lru_lock);
+ }
+
+ /*
+ * With swappiness at 100, anonymous and file have the same priority.
+ * This scanning priority is essentially the inverse of IO cost.
+ */
+ anon_prio = sc->swappiness;
+ file_prio = 200 - sc->swappiness;
+
+ /*
+ * anon recent_rotated[0]
+ * %anon = 100 * ----------- / ----------------- * IO cost
+ * anon + file rotate_sum
+ */
+ ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
+ ap /= zone->recent_rotated[0] + 1;
+
+ fp = (file_prio + 1) * (zone->recent_scanned[1] + 1);
+ fp /= zone->recent_rotated[1] + 1;
+
+ /* Normalize to percentages */
+ percent[0] = 100 * ap / (ap + fp + 1);
+ percent[1] = 100 - percent[0];
+}
+
+
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
static unsigned long shrink_zone(int priority, struct zone *zone,
struct scan_control *sc)
{
- unsigned long nr_active;
- unsigned long nr_inactive;
+ unsigned long nr[NR_LRU_LISTS];
unsigned long nr_to_scan;
unsigned long nr_reclaimed = 0;
+ unsigned long percent[2]; /* anon @ 0; file @ 1 */
+ enum lru_list l;
- if (scan_global_lru(sc)) {
- /*
- * Add one to nr_to_scan just to make sure that the kernel
- * will slowly sift through the active list.
- */
- zone->nr_scan_active +=
- (zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
- nr_active = zone->nr_scan_active;
- zone->nr_scan_inactive +=
- (zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
- nr_inactive = zone->nr_scan_inactive;
- if (nr_inactive >= sc->swap_cluster_max)
- zone->nr_scan_inactive = 0;
- else
- nr_inactive = 0;
-
- if (nr_active >= sc->swap_cluster_max)
- zone->nr_scan_active = 0;
- else
- nr_active = 0;
- } else {
- /*
- * This reclaim occurs not because zone memory shortage but
- * because memory controller hits its limit.
- * Then, don't modify zone reclaim related data.
- */
- nr_active = mem_cgroup_calc_reclaim_active(sc->mem_cgroup,
- zone, priority);
-
- nr_inactive = mem_cgroup_calc_reclaim_inactive(sc->mem_cgroup,
- zone, priority);
- }
+ get_scan_ratio(zone, sc, percent);
+ for_each_evictable_lru(l) {
+ if (scan_global_lru(sc)) {
+ int file = is_file_lru(l);
+ int scan;
- while (nr_active || nr_inactive) {
- if (nr_active) {
- nr_to_scan = min(nr_active,
- (unsigned long)sc->swap_cluster_max);
- nr_active -= nr_to_scan;
- shrink_active_list(nr_to_scan, zone, sc, priority);
+ scan = zone_page_state(zone, NR_LRU_BASE + l);
+ if (priority) {
+ scan >>= priority;
+ scan = (scan * percent[file]) / 100;
+ }
+ zone->lru[l].nr_scan += scan;
+ nr[l] = zone->lru[l].nr_scan;
+ if (nr[l] >= sc->swap_cluster_max)
+ zone->lru[l].nr_scan = 0;
+ else
+ nr[l] = 0;
+ } else {
+ /*
+ * This reclaim occurs not because zone memory shortage
+ * but because memory controller hits its limit.
+ * Don't modify zone reclaim related data.
+ */
+ nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone,
+ priority, l);
}
+ }
- if (nr_inactive) {
- nr_to_scan = min(nr_inactive,
+ while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
+ nr[LRU_INACTIVE_FILE]) {
+ for_each_evictable_lru(l) {
+ if (nr[l]) {
+ nr_to_scan = min(nr[l],
(unsigned long)sc->swap_cluster_max);
- nr_inactive -= nr_to_scan;
- nr_reclaimed += shrink_inactive_list(nr_to_scan, zone,
- sc);
+ nr[l] -= nr_to_scan;
+
+ nr_reclaimed += shrink_list(l, nr_to_scan,
+ zone, sc, priority);
+ }
}
}
+ /*
+ * Even if we did not try to evict anon pages at all, we want to
+ * rebalance the anon lru active/inactive ratio.
+ */
+ if (!scan_global_lru(sc) || inactive_anon_is_low(zone))
+ shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+ else if (!scan_global_lru(sc))
+ shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+
throttle_vm_writeout(sc->gfp_mask);
return nr_reclaimed;
}
@@ -1321,7 +1526,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
return nr_reclaimed;
}
-
+
/*
* This is the main entry point to direct page reclaim.
*
@@ -1364,8 +1569,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
- lru_pages += zone_page_state(zone, NR_ACTIVE)
- + zone_page_state(zone, NR_INACTIVE);
+ lru_pages += zone_lru_pages(zone);
}
}
@@ -1555,6 +1759,14 @@ loop_again:
priority != DEF_PRIORITY)
continue;
+ /*
+ * Do some background aging of the anon list, to give
+ * pages a chance to be referenced before reclaiming.
+ */
+ if (inactive_anon_is_low(zone))
+ shrink_active_list(SWAP_CLUSTER_MAX, zone,
+ &sc, priority, 0);
+
if (!zone_watermark_ok(zone, order, zone->pages_high,
0, 0)) {
end_zone = i;
@@ -1567,8 +1779,7 @@ loop_again:
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
- lru_pages += zone_page_state(zone, NR_ACTIVE)
- + zone_page_state(zone, NR_INACTIVE);
+ lru_pages += zone_lru_pages(zone);
}
/*
@@ -1612,8 +1823,7 @@ loop_again:
if (zone_is_all_unreclaimable(zone))
continue;
if (nr_slab == 0 && zone->pages_scanned >=
- (zone_page_state(zone, NR_ACTIVE)
- + zone_page_state(zone, NR_INACTIVE)) * 6)
+ (zone_lru_pages(zone) * 6))
zone_set_flag(zone,
ZONE_ALL_UNRECLAIMABLE);
/*
@@ -1667,7 +1877,7 @@ out:
/*
* The background pageout daemon, started as a kernel thread
- * from the init process.
+ * from the init process.
*
* This basically trickles out pages so that we have _some_
* free memory available even if there is no other activity
@@ -1761,6 +1971,14 @@ void wakeup_kswapd(struct zone *zone, int order)
wake_up_interruptible(&pgdat->kswapd_wait);
}
+unsigned long global_lru_pages(void)
+{
+ return global_page_state(NR_ACTIVE_ANON)
+ + global_page_state(NR_ACTIVE_FILE)
+ + global_page_state(NR_INACTIVE_ANON)
+ + global_page_state(NR_INACTIVE_FILE);
+}
+
#ifdef CONFIG_PM
/*
* Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages
@@ -1774,6 +1992,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
{
struct zone *zone;
unsigned long nr_to_scan, ret = 0;
+ enum lru_list l;
for_each_zone(zone) {
@@ -1783,38 +2002,31 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
continue;
- /* For pass = 0 we don't shrink the active list */
- if (pass > 0) {
- zone->nr_scan_active +=
- (zone_page_state(zone, NR_ACTIVE) >> prio) + 1;
- if (zone->nr_scan_active >= nr_pages || pass > 3) {
- zone->nr_scan_active = 0;
+ for_each_evictable_lru(l) {
+ /* For pass = 0, we don't shrink the active list */
+ if (pass == 0 &&
+ (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE))
+ continue;
+
+ zone->lru[l].nr_scan +=
+ (zone_page_state(zone, NR_LRU_BASE + l)
+ >> prio) + 1;
+ if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {
+ zone->lru[l].nr_scan = 0;
nr_to_scan = min(nr_pages,
- zone_page_state(zone, NR_ACTIVE));
- shrink_active_list(nr_to_scan, zone, sc, prio);
+ zone_page_state(zone,
+ NR_LRU_BASE + l));
+ ret += shrink_list(l, nr_to_scan, zone,
+ sc, prio);
+ if (ret >= nr_pages)
+ return ret;
}
}
-
- zone->nr_scan_inactive +=
- (zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
- if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
- zone->nr_scan_inactive = 0;
- nr_to_scan = min(nr_pages,
- zone_page_state(zone, NR_INACTIVE));
- ret += shrink_inactive_list(nr_to_scan, zone, sc);
- if (ret >= nr_pages)
- return ret;
- }
}
return ret;
}
-static unsigned long count_lru_pages(void)
-{
- return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
-}
-
/*
* Try to free `nr_pages' of memory, system-wide, and return the number of
* freed pages.
@@ -1840,7 +2052,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
current->reclaim_state = &reclaim_state;
- lru_pages = count_lru_pages();
+ lru_pages = global_lru_pages();
nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
/* If slab caches are huge, it's better to hit them first */
while (nr_slab >= lru_pages) {
@@ -1883,7 +2095,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
reclaim_state.reclaimed_slab = 0;
shrink_slab(sc.nr_scanned, sc.gfp_mask,
- count_lru_pages());
+ global_lru_pages());
ret += reclaim_state.reclaimed_slab;
if (ret >= nr_pages)
goto out;
@@ -1900,7 +2112,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
if (!ret) {
do {
reclaim_state.reclaimed_slab = 0;
- shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages());
+ shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
ret += reclaim_state.reclaimed_slab;
} while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
}
@@ -2128,3 +2340,285 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
return ret;
}
#endif
+
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * page_evictable - test whether a page is evictable
+ * @page: the page to test
+ * @vma: the VMA in which the page is or will be mapped, may be NULL
+ *
+ * Test whether page is evictable--i.e., should be placed on active/inactive
+ * lists vs unevictable list. The vma argument is !NULL when called from the
+ * fault path to determine how to instantate a new page.
+ *
+ * Reasons page might not be evictable:
+ * (1) page's mapping marked unevictable
+ * (2) page is part of an mlocked VMA
+ *
+ */
+int page_evictable(struct page *page, struct vm_area_struct *vma)
+{
+
+ if (mapping_unevictable(page_mapping(page)))
+ return 0;
+
+ if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
+ return 0;
+
+ return 1;
+}
+
+static void show_page_path(struct page *page)
+{
+ char buf[256];
+ if (page_is_file_cache(page)) {
+ struct address_space *mapping = page->mapping;
+ struct dentry *dentry;
+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+ spin_lock(&mapping->i_mmap_lock);
+ dentry = d_find_alias(mapping->host);
+ printk(KERN_INFO "rescued: %s %lu\n",
+ dentry_path(dentry, buf, 256), pgoff);
+ spin_unlock(&mapping->i_mmap_lock);
+ } else {
+#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU)
+ struct anon_vma *anon_vma;
+ struct vm_area_struct *vma;
+
+ anon_vma = page_lock_anon_vma(page);
+ if (!anon_vma)
+ return;
+
+ list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+ printk(KERN_INFO "rescued: anon %s\n",
+ vma->vm_mm->owner->comm);
+ break;
+ }
+ page_unlock_anon_vma(anon_vma);
+#endif
+ }
+}
+
+
+/**
+ * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
+ * @page: page to check evictability and move to appropriate lru list
+ * @zone: zone page is in
+ *
+ * Checks a page for evictability and moves the page to the appropriate
+ * zone lru list.
+ *
+ * Restrictions: zone->lru_lock must be held, page must be on LRU and must
+ * have PageUnevictable set.
+ */
+static void check_move_unevictable_page(struct page *page, struct zone *zone)
+{
+ VM_BUG_ON(PageActive(page));
+
+retry:
+ ClearPageUnevictable(page);
+ if (page_evictable(page, NULL)) {
+ enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
+
+ show_page_path(page);
+
+ __dec_zone_state(zone, NR_UNEVICTABLE);
+ list_move(&page->lru, &zone->lru[l].list);
+ __inc_zone_state(zone, NR_INACTIVE_ANON + l);
+ __count_vm_event(UNEVICTABLE_PGRESCUED);
+ } else {
+ /*
+ * rotate unevictable list
+ */
+ SetPageUnevictable(page);
+ list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
+ if (page_evictable(page, NULL))
+ goto retry;
+ }
+}
+
+/**
+ * scan_mapping_unevictable_pages - scan an address space for evictable pages
+ * @mapping: struct address_space to scan for evictable pages
+ *
+ * Scan all pages in mapping. Check unevictable pages for
+ * evictability and move them to the appropriate zone lru list.
+ */
+void scan_mapping_unevictable_pages(struct address_space *mapping)
+{
+ pgoff_t next = 0;
+ pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
+ struct zone *zone;
+ struct pagevec pvec;
+
+ if (mapping->nrpages == 0)
+ return;
+
+ pagevec_init(&pvec, 0);
+ while (next < end &&
+ pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+ int i;
+ int pg_scanned = 0;
+
+ zone = NULL;
+
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+ pgoff_t page_index = page->index;
+ struct zone *pagezone = page_zone(page);
+
+ pg_scanned++;
+ if (page_index > next)
+ next = page_index;
+ next++;
+
+ if (pagezone != zone) {
+ if (zone)
+ spin_unlock_irq(&zone->lru_lock);
+ zone = pagezone;
+ spin_lock_irq(&zone->lru_lock);
+ }
+
+ if (PageLRU(page) && PageUnevictable(page))
+ check_move_unevictable_page(page, zone);
+ }
+ if (zone)
+ spin_unlock_irq(&zone->lru_lock);
+ pagevec_release(&pvec);
+
+ count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
+ }
+
+}
+
+/**
+ * scan_zone_unevictable_pages - check unevictable list for evictable pages
+ * @zone - zone of which to scan the unevictable list
+ *
+ * Scan @zone's unevictable LRU lists to check for pages that have become
+ * evictable. Move those that have to @zone's inactive list where they
+ * become candidates for reclaim, unless shrink_inactive_zone() decides
+ * to reactivate them. Pages that are still unevictable are rotated
+ * back onto @zone's unevictable list.
+ */
+#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */
+void scan_zone_unevictable_pages(struct zone *zone)
+{
+ struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
+ unsigned long scan;
+ unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE);
+
+ while (nr_to_scan > 0) {
+ unsigned long batch_size = min(nr_to_scan,
+ SCAN_UNEVICTABLE_BATCH_SIZE);
+
+ spin_lock_irq(&zone->lru_lock);
+ for (scan = 0; scan < batch_size; scan++) {
+ struct page *page = lru_to_page(l_unevictable);
+
+ if (!trylock_page(page))
+ continue;
+
+ prefetchw_prev_lru_page(page, l_unevictable, flags);
+
+ if (likely(PageLRU(page) && PageUnevictable(page)))
+ check_move_unevictable_page(page, zone);
+
+ unlock_page(page);
+ }
+ spin_unlock_irq(&zone->lru_lock);
+
+ nr_to_scan -= batch_size;
+ }
+}
+
+
+/**
+ * scan_all_zones_unevictable_pages - scan all unevictable lists for evictable pages
+ *
+ * A really big hammer: scan all zones' unevictable LRU lists to check for
+ * pages that have become evictable. Move those back to the zones'
+ * inactive list where they become candidates for reclaim.
+ * This occurs when, e.g., we have unswappable pages on the unevictable lists,
+ * and we add swap to the system. As such, it runs in the context of a task
+ * that has possibly/probably made some previously unevictable pages
+ * evictable.
+ */
+void scan_all_zones_unevictable_pages(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ scan_zone_unevictable_pages(zone);
+ }
+}
+
+/*
+ * scan_unevictable_pages [vm] sysctl handler. On demand re-scan of
+ * all nodes' unevictable lists for evictable pages
+ */
+unsigned long scan_unevictable_pages;
+
+int scan_unevictable_handler(struct ctl_table *table, int write,
+ struct file *file, void __user *buffer,
+ size_t *length, loff_t *ppos)
+{
+ proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+
+ if (write && *(unsigned long *)table->data)
+ scan_all_zones_unevictable_pages();
+
+ scan_unevictable_pages = 0;
+ return 0;
+}
+
+/*
+ * per node 'scan_unevictable_pages' attribute. On demand re-scan of
+ * a specified node's per zone unevictable lists for evictable pages.
+ */
+
+static ssize_t read_scan_unevictable_node(struct sys_device *dev,
+ struct sysdev_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "0\n"); /* always zero; should fit... */
+}
+
+static ssize_t write_scan_unevictable_node(struct sys_device *dev,
+ struct sysdev_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
+ struct zone *zone;
+ unsigned long res;
+ unsigned long req = strict_strtoul(buf, 10, &res);
+
+ if (!req)
+ return 1; /* zero is no-op */
+
+ for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
+ if (!populated_zone(zone))
+ continue;
+ scan_zone_unevictable_pages(zone);
+ }
+ return 1;
+}
+
+
+static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
+ read_scan_unevictable_node,
+ write_scan_unevictable_node);
+
+int scan_unevictable_register_node(struct node *node)
+{
+ return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
+}
+
+void scan_unevictable_unregister_node(struct node *node)
+{
+ sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
+}
+
+#endif
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d7826af2fb0..9343227c5c6 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -619,8 +619,14 @@ const struct seq_operations pagetypeinfo_op = {
static const char * const vmstat_text[] = {
/* Zoned VM counters */
"nr_free_pages",
- "nr_inactive",
- "nr_active",
+ "nr_inactive_anon",
+ "nr_active_anon",
+ "nr_inactive_file",
+ "nr_active_file",
+#ifdef CONFIG_UNEVICTABLE_LRU
+ "nr_unevictable",
+ "nr_mlock",
+#endif
"nr_anon_pages",
"nr_mapped",
"nr_file_pages",
@@ -675,6 +681,16 @@ static const char * const vmstat_text[] = {
"htlb_buddy_alloc_success",
"htlb_buddy_alloc_fail",
#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+ "unevictable_pgs_culled",
+ "unevictable_pgs_scanned",
+ "unevictable_pgs_rescued",
+ "unevictable_pgs_mlocked",
+ "unevictable_pgs_munlocked",
+ "unevictable_pgs_cleared",
+ "unevictable_pgs_stranded",
+ "unevictable_pgs_mlockfreed",
+#endif
#endif
};
@@ -688,7 +704,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
"\n min %lu"
"\n low %lu"
"\n high %lu"
- "\n scanned %lu (a: %lu i: %lu)"
+ "\n scanned %lu (aa: %lu ia: %lu af: %lu if: %lu)"
"\n spanned %lu"
"\n present %lu",
zone_page_state(zone, NR_FREE_PAGES),
@@ -696,7 +712,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
zone->pages_low,
zone->pages_high,
zone->pages_scanned,
- zone->nr_scan_active, zone->nr_scan_inactive,
+ zone->lru[LRU_ACTIVE_ANON].nr_scan,
+ zone->lru[LRU_INACTIVE_ANON].nr_scan,
+ zone->lru[LRU_ACTIVE_FILE].nr_scan,
+ zone->lru[LRU_INACTIVE_FILE].nr_scan,
zone->spanned_pages,
zone->present_pages);
@@ -733,10 +752,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
seq_printf(m,
"\n all_unreclaimable: %u"
"\n prev_priority: %i"
- "\n start_pfn: %lu",
+ "\n start_pfn: %lu"
+ "\n inactive_ratio: %u",
zone_is_all_unreclaimable(zone),
zone->prev_priority,
- zone->zone_start_pfn);
+ zone->zone_start_pfn,
+ zone->inactive_ratio);
seq_putc(m, '\n');
}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index a4abed5b4c4..fa5cda4e552 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -719,7 +719,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
return NF_ACCEPT;
}
*d = (struct net_device *)in;
- NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
+ NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
(struct net_device *)out, br_nf_forward_finish);
return NF_STOLEN;
diff --git a/net/core/dev.c b/net/core/dev.c
index 868ec0ba8b7..b8a4fd0806a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -924,10 +924,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
strlcpy(dev->name, newname, IFNAMSIZ);
rollback:
- err = device_rename(&dev->dev, dev->name);
- if (err) {
+ ret = device_rename(&dev->dev, dev->name);
+ if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
- return err;
+ return ret;
}
write_lock_bh(&dev_base_lock);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 11062780bb0..d4ce1224e00 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -259,7 +259,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
fl.fl6_flowlabel = 0;
fl.oif = ireq6->iif;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_sk(sk)->sport;
+ fl.fl_ip_sport = inet_rsk(req)->loc_port;
security_req_classify_flow(req, &fl);
opt = np->opt;
@@ -558,7 +558,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
fl.oif = sk->sk_bound_dev_if;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_sk(sk)->sport;
+ fl.fl_ip_sport = inet_rsk(req)->loc_port;
security_sk_classify_flow(sk, &fl);
if (ip6_dst_lookup(sk, &dst, &fl))
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b2804e2d1b8..e6bf99e3e41 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -309,6 +309,7 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
struct dccp_request_sock *dreq = dccp_rsk(req);
inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
+ inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport;
inet_rsk(req)->acked = 0;
req->rcv_wnd = sysctl_dccp_feat_sequence_window;
dreq->dreq_timestamp_echo = 0;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d06945c7d3d..809d803d500 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -347,7 +347,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
/* Build and checksum header */
dh = dccp_zeroed_hdr(skb, dccp_header_size);
- dh->dccph_sport = inet_sk(sk)->sport;
+ dh->dccph_sport = inet_rsk(req)->loc_port;
dh->dccph_dport = inet_rsk(req)->rmt_port;
dh->dccph_doff = (dccp_header_size +
DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b043eda60b0..1a9dd66511f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -663,7 +663,7 @@ out:
void arp_xmit(struct sk_buff *skb)
{
/* Send it off, maybe filter it using firewalling first. */
- NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
+ NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
}
/*
@@ -928,7 +928,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
- return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
+ return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
freeskb:
kfree_skb(skb);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ffeaffc3fff..8303e4b406c 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -742,6 +742,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
*obj = kmalloc(sizeof(struct snmp_object) + len,
GFP_ATOMIC);
if (*obj == NULL) {
+ kfree(p);
kfree(id);
if (net_ratelimit())
printk("OOM in bsalg (%d)\n", __LINE__);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index ec394cf5a19..676c80b5b14 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -204,6 +204,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
req->mss = mss;
ireq->rmt_port = th->source;
+ ireq->loc_port = th->dest;
ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
if (ipv6_opt_accepted(sk, skb) ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e5310c9b84d..b6b356b7912 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -476,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
fl.fl6_flowlabel = 0;
fl.oif = treq->iif;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_sk(sk)->sport;
+ fl.fl_ip_sport = inet_rsk(req)->loc_port;
security_req_classify_flow(req, &fl);
opt = np->opt;
@@ -1309,7 +1309,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
fl.oif = sk->sk_bound_dev_if;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_sk(sk)->sport;
+ fl.fl_ip_sport = inet_rsk(req)->loc_port;
security_req_classify_flow(req, &fl);
if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1865,7 +1865,7 @@ static void get_openreq6(struct seq_file *seq,
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3],
- ntohs(inet_sk(sk)->sport),
+ ntohs(inet_rsk(req)->loc_port),
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3],
ntohs(inet_rsk(req)->rmt_port),
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 78892cf2b02..25dcef9f219 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -271,7 +271,6 @@ config NF_CONNTRACK_TFTP
config NF_CT_NETLINK
tristate 'Connection tracking netlink interface'
select NETFILTER_NETLINK
- depends on NF_NAT=n || NF_NAT
default m if NETFILTER_ADVANCED=n
help
This option enables support for a netlink-based userspace interface
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 05048e40326..79a69805221 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -25,11 +25,13 @@ menuconfig IP_VS
if IP_VS
config IP_VS_IPV6
- bool "IPv6 support for IPVS (DANGEROUS)"
+ bool "IPv6 support for IPVS"
depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
---help---
Add IPv6 support to IPVS. This is incomplete and might be dangerous.
+ See http://www.mindbasket.com/ipvs for more information.
+
Say N if unsure.
config IP_VS_DEBUG
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2e4ad9671e1..a040d46f85d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -813,6 +813,7 @@ out:
return err;
}
+#ifdef CONFIG_NF_NAT_NEEDED
static int
ctnetlink_parse_nat_setup(struct nf_conn *ct,
enum nf_nat_manip_type manip,
@@ -840,6 +841,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
return parse_nat_setup(ct, manip, attr);
}
+#endif
static int
ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 2cc1fff4930..f9977b3311f 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -48,7 +48,7 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
},
{
.name = "NFQUEUE",
- .family = NF_ARP,
+ .family = NFPROTO_ARP,
.target = nfqueue_tg,
.targetsize = sizeof(struct xt_NFQ_info),
.me = THIS_MODULE,
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 6f62c36948d..7ac54eab0b0 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -61,7 +61,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
if (info->flags & IPRANGE_SRC) {
m = ntohl(iph->saddr) < ntohl(info->src_min.ip);
m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
- m ^= info->flags & IPRANGE_SRC_INV;
+ m ^= !!(info->flags & IPRANGE_SRC_INV);
if (m) {
pr_debug("src IP " NIPQUAD_FMT " NOT in range %s"
NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -75,7 +75,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
if (info->flags & IPRANGE_DST) {
m = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
- m ^= info->flags & IPRANGE_DST_INV;
+ m ^= !!(info->flags & IPRANGE_DST_INV);
if (m) {
pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s"
NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -114,14 +114,14 @@ iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
if (info->flags & IPRANGE_SRC) {
m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
- m ^= info->flags & IPRANGE_SRC_INV;
+ m ^= !!(info->flags & IPRANGE_SRC_INV);
if (m)
return false;
}
if (info->flags & IPRANGE_DST) {
m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
- m ^= info->flags & IPRANGE_DST_INV;
+ m ^= !!(info->flags & IPRANGE_DST_INV);
if (m)
return false;
}
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 4ebd4ca9a99..280c471bcdf 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -318,15 +318,15 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
for (i = 0; i < ip_list_hash_size; i++)
INIT_LIST_HEAD(&t->iphash[i]);
#ifdef CONFIG_PROC_FS
- t->proc = proc_create(t->name, ip_list_perms, recent_proc_dir,
- &recent_mt_fops);
+ t->proc = proc_create_data(t->name, ip_list_perms, recent_proc_dir,
+ &recent_mt_fops, t);
if (t->proc == NULL) {
kfree(t);
goto out;
}
#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
- t->proc_old = proc_create(t->name, ip_list_perms, proc_old_dir,
- &recent_old_fops);
+ t->proc_old = proc_create_data(t->name, ip_list_perms, proc_old_dir,
+ &recent_old_fops, t);
if (t->proc_old == NULL) {
remove_proc_entry(t->name, proc_old_dir);
kfree(t);
@@ -334,11 +334,9 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
}
t->proc_old->uid = ip_list_uid;
t->proc_old->gid = ip_list_gid;
- t->proc_old->data = t;
#endif
t->proc->uid = ip_list_uid;
t->proc->gid = ip_list_gid;
- t->proc->data = t;
#endif
spin_lock_bh(&recent_lock);
list_add_tail(&t->list, &tables);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7b5572d6beb..93cd30ce650 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -326,6 +326,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
static struct netdev_queue noop_netdev_queue = {
.qdisc = &noop_qdisc,
+ .qdisc_sleeping = &noop_qdisc,
};
struct Qdisc noop_qdisc = {
@@ -352,6 +353,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
.qdisc = &noqueue_qdisc,
+ .qdisc_sleeping = &noqueue_qdisc,
};
static struct Qdisc noqueue_qdisc = {
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 46f23971f7e..5ba78701adc 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -1,5 +1,5 @@
/*
- * dev_cgroup.c - device cgroup subsystem
+ * device_cgroup.c - device cgroup subsystem
*
* Copyright 2007 IBM Corp
*/
@@ -10,6 +10,7 @@
#include <linux/list.h>
#include <linux/uaccess.h>
#include <linux/seq_file.h>
+#include <linux/rcupdate.h>
#define ACC_MKNOD 1
#define ACC_READ 2
@@ -22,18 +23,8 @@
/*
* whitelist locking rules:
- * cgroup_lock() cannot be taken under dev_cgroup->lock.
- * dev_cgroup->lock can be taken with or without cgroup_lock().
- *
- * modifications always require cgroup_lock
- * modifications to a list which is visible require the
- * dev_cgroup->lock *and* cgroup_lock()
- * walking the list requires dev_cgroup->lock or cgroup_lock().
- *
- * reasoning: dev_whitelist_copy() needs to kmalloc, so needs
- * a mutex, which the cgroup_lock() is. Since modifying
- * a visible list requires both locks, either lock can be
- * taken for walking the list.
+ * hold cgroup_lock() for update/read.
+ * hold rcu_read_lock() for read.
*/
struct dev_whitelist_item {
@@ -47,7 +38,6 @@ struct dev_whitelist_item {
struct dev_cgroup {
struct cgroup_subsys_state css;
struct list_head whitelist;
- spinlock_t lock;
};
static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
@@ -84,13 +74,9 @@ static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig)
struct dev_whitelist_item *wh, *tmp, *new;
list_for_each_entry(wh, orig, list) {
- new = kmalloc(sizeof(*wh), GFP_KERNEL);
+ new = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
if (!new)
goto free_and_exit;
- new->major = wh->major;
- new->minor = wh->minor;
- new->type = wh->type;
- new->access = wh->access;
list_add_tail(&new->list, dest);
}
@@ -107,19 +93,16 @@ free_and_exit:
/* Stupid prototype - don't bother combining existing entries */
/*
* called under cgroup_lock()
- * since the list is visible to other tasks, we need the spinlock also
*/
static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
struct dev_whitelist_item *wh)
{
struct dev_whitelist_item *whcopy, *walk;
- whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL);
+ whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
if (!whcopy)
return -ENOMEM;
- memcpy(whcopy, wh, sizeof(*whcopy));
- spin_lock(&dev_cgroup->lock);
list_for_each_entry(walk, &dev_cgroup->whitelist, list) {
if (walk->type != wh->type)
continue;
@@ -135,7 +118,6 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
if (whcopy != NULL)
list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
- spin_unlock(&dev_cgroup->lock);
return 0;
}
@@ -149,14 +131,12 @@ static void whitelist_item_free(struct rcu_head *rcu)
/*
* called under cgroup_lock()
- * since the list is visible to other tasks, we need the spinlock also
*/
static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
struct dev_whitelist_item *wh)
{
struct dev_whitelist_item *walk, *tmp;
- spin_lock(&dev_cgroup->lock);
list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) {
if (walk->type == DEV_ALL)
goto remove;
@@ -174,7 +154,6 @@ remove:
call_rcu(&walk->rcu, whitelist_item_free);
}
}
- spin_unlock(&dev_cgroup->lock);
}
/*
@@ -214,7 +193,6 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
}
}
- spin_lock_init(&dev_cgroup->lock);
return &dev_cgroup->css;
}
@@ -330,15 +308,11 @@ static int parent_has_perm(struct dev_cgroup *childcg,
{
struct cgroup *pcg = childcg->css.cgroup->parent;
struct dev_cgroup *parent;
- int ret;
if (!pcg)
return 1;
parent = cgroup_to_devcgroup(pcg);
- spin_lock(&parent->lock);
- ret = may_access_whitelist(parent, wh);
- spin_unlock(&parent->lock);
- return ret;
+ return may_access_whitelist(parent, wh);
}
/*
@@ -357,17 +331,14 @@ static int parent_has_perm(struct dev_cgroup *childcg,
static int devcgroup_update_access(struct dev_cgroup *devcgroup,
int filetype, const char *buffer)
{
- struct dev_cgroup *cur_devcgroup;
const char *b;
char *endp;
- int retval = 0, count;
+ int count;
struct dev_whitelist_item wh;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- cur_devcgroup = task_devcgroup(current);
-
memset(&wh, 0, sizeof(wh));
b = buffer;
@@ -437,7 +408,6 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
}
handle:
- retval = 0;
switch (filetype) {
case DEVCG_ALLOW:
if (!parent_has_perm(devcgroup, &wh))
diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c
index 89b7f549beb..ea2bf82c937 100644
--- a/sound/core/pcm_misc.c
+++ b/sound/core/pcm_misc.c
@@ -319,6 +319,7 @@ EXPORT_SYMBOL(snd_pcm_format_physical_width);
/**
* snd_pcm_format_size - return the byte size of samples on the given format
* @format: the format to check
+ * @samples: sampling rate
*
* Returns the byte size of the given samples for the format, or a
* negative error code if unknown format.
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index e5e749f3e0e..73be7e14a60 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -51,7 +51,7 @@ static int emu10k1_playback_constraints(struct snd_pcm_runtime *runtime)
if (err < 0)
return err;
err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 256, UINT_MAX);
- if (err) < 0)
+ if (err < 0)
return err;
return 0;
}
diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c
index a7d89662acf..88fbf285d2b 100644
--- a/sound/pci/ca0106/ca0106_main.c
+++ b/sound/pci/ca0106/ca0106_main.c
@@ -759,7 +759,6 @@ static int snd_ca0106_pcm_prepare_playback(struct snd_pcm_substream *substream)
SPCS_CHANNELNUM_LEFT | SPCS_SOURCENUM_UNSPEC |
SPCS_GENERATIONSTATUS | 0x00001200 |
0x00000000 | SPCS_EMPHASIS_NONE | SPCS_COPYRIGHT );
- }
#endif
return 0;
diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c
index 20d0e328288..8f9e3859c37 100644
--- a/sound/ppc/snd_ps3.c
+++ b/sound/ppc/snd_ps3.c
@@ -666,6 +666,7 @@ static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
card->avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
card->avs.avs_audio_format = PS3AV_CMD_AUDIO_FORMAT_PCM;
card->avs.avs_audio_source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
+ memcpy(card->avs.avs_cs_info, ps3av_mode_cs_info, 8);
ret = snd_ps3_change_avsetting(card);
@@ -685,6 +686,7 @@ static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
{
struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
struct snd_ps3_avsetting_info avs;
+ int ret;
avs = card->avs;
@@ -729,19 +731,92 @@ static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
return 1;
}
- if ((card->avs.avs_audio_width != avs.avs_audio_width) ||
- (card->avs.avs_audio_rate != avs.avs_audio_rate)) {
- card->avs = avs;
- snd_ps3_change_avsetting(card);
+ memcpy(avs.avs_cs_info, ps3av_mode_cs_info, 8);
+ if (memcmp(&card->avs, &avs, sizeof(avs))) {
pr_debug("%s: after freq=%d width=%d\n", __func__,
card->avs.avs_audio_rate, card->avs.avs_audio_width);
- return 0;
+ card->avs = avs;
+ snd_ps3_change_avsetting(card);
+ ret = 0;
} else
+ ret = 1;
+
+ /* check CS non-audio bit and mute accordingly */
+ if (avs.avs_cs_info[0] & 0x02)
+ ps3av_audio_mute_analog(1); /* mute if non-audio */
+ else
+ ps3av_audio_mute_analog(0);
+
+ return ret;
+}
+
+/*
+ * SPDIF status bits controls
+ */
+static int snd_ps3_spdif_mask_info(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+{
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_IEC958;
+ uinfo->count = 1;
+ return 0;
+}
+
+/* FIXME: ps3av_set_audio_mode() assumes only consumer mode */
+static int snd_ps3_spdif_cmask_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ memset(ucontrol->value.iec958.status, 0xff, 8);
+ return 0;
+}
+
+static int snd_ps3_spdif_pmask_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ return 0;
+}
+
+static int snd_ps3_spdif_default_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ memcpy(ucontrol->value.iec958.status, ps3av_mode_cs_info, 8);
+ return 0;
+}
+
+static int snd_ps3_spdif_default_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ if (memcmp(ps3av_mode_cs_info, ucontrol->value.iec958.status, 8)) {
+ memcpy(ps3av_mode_cs_info, ucontrol->value.iec958.status, 8);
return 1;
+ }
+ return 0;
}
+static struct snd_kcontrol_new spdif_ctls[] = {
+ {
+ .access = SNDRV_CTL_ELEM_ACCESS_READ,
+ .iface = SNDRV_CTL_ELEM_IFACE_PCM,
+ .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK),
+ .info = snd_ps3_spdif_mask_info,
+ .get = snd_ps3_spdif_cmask_get,
+ },
+ {
+ .access = SNDRV_CTL_ELEM_ACCESS_READ,
+ .iface = SNDRV_CTL_ELEM_IFACE_PCM,
+ .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK),
+ .info = snd_ps3_spdif_mask_info,
+ .get = snd_ps3_spdif_pmask_get,
+ },
+ {
+ .iface = SNDRV_CTL_ELEM_IFACE_PCM,
+ .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT),
+ .info = snd_ps3_spdif_mask_info,
+ .get = snd_ps3_spdif_default_get,
+ .put = snd_ps3_spdif_default_put,
+ },
+};
static int snd_ps3_map_mmio(void)
@@ -842,7 +917,7 @@ static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
{
- int ret;
+ int i, ret;
u64 lpar_addr, lpar_size;
BUG_ON(!firmware_has_feature(FW_FEATURE_PS3_LV1));
@@ -903,6 +978,15 @@ static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
strcpy(the_card.card->driver, "PS3");
strcpy(the_card.card->shortname, "PS3");
strcpy(the_card.card->longname, "PS3 sound");
+
+ /* create control elements */
+ for (i = 0; i < ARRAY_SIZE(spdif_ctls); i++) {
+ ret = snd_ctl_add(the_card.card,
+ snd_ctl_new1(&spdif_ctls[i], &the_card));
+ if (ret < 0)
+ goto clean_card;
+ }
+
/* create PCM devices instance */
/* NOTE:this driver works assuming pcm:substream = 1:1 */
ret = snd_pcm_new(the_card.card,
diff --git a/sound/ppc/snd_ps3.h b/sound/ppc/snd_ps3.h
index 4b7e6fbbe50..326fb29e82d 100644
--- a/sound/ppc/snd_ps3.h
+++ b/sound/ppc/snd_ps3.h
@@ -51,6 +51,7 @@ struct snd_ps3_avsetting_info {
uint32_t avs_audio_width;
uint32_t avs_audio_format; /* fixed */
uint32_t avs_audio_source; /* fixed */
+ unsigned char avs_cs_info[8];
};
/*
* PS3 audio 'card' instance
diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index 0a063a98a66..853b33ae343 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -43,6 +43,7 @@
struct omap_mcbsp_data {
unsigned int bus_id;
struct omap_mcbsp_reg_cfg regs;
+ unsigned int fmt;
/*
* Flags indicating is the bus already activated and configured by
* another substream
@@ -200,6 +201,7 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data);
struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs;
int dma, bus_id = mcbsp_data->bus_id, id = cpu_dai->id;
+ int wlen;
unsigned long port;
if (cpu_class_is_omap1()) {
@@ -244,19 +246,29 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
switch (params_format(params)) {
case SNDRV_PCM_FORMAT_S16_LE:
/* Set word lengths */
+ wlen = 16;
regs->rcr2 |= RWDLEN2(OMAP_MCBSP_WORD_16);
regs->rcr1 |= RWDLEN1(OMAP_MCBSP_WORD_16);
regs->xcr2 |= XWDLEN2(OMAP_MCBSP_WORD_16);
regs->xcr1 |= XWDLEN1(OMAP_MCBSP_WORD_16);
- /* Set FS period and length in terms of bit clock periods */
- regs->srgr2 |= FPER(16 * 2 - 1);
- regs->srgr1 |= FWID(16 - 1);
break;
default:
/* Unsupported PCM format */
return -EINVAL;
}
+ /* Set FS period and length in terms of bit clock periods */
+ switch (mcbsp_data->fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_I2S:
+ regs->srgr2 |= FPER(wlen * 2 - 1);
+ regs->srgr1 |= FWID(wlen - 1);
+ break;
+ case SND_SOC_DAIFMT_DSP_A:
+ regs->srgr2 |= FPER(wlen * 2 - 1);
+ regs->srgr1 |= FWID(0);
+ break;
+ }
+
omap_mcbsp_config(bus_id, &mcbsp_data->regs);
mcbsp_data->configured = 1;
@@ -272,10 +284,12 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
{
struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data);
struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs;
+ unsigned int temp_fmt = fmt;
if (mcbsp_data->configured)
return 0;
+ mcbsp_data->fmt = fmt;
memset(regs, 0, sizeof(*regs));
/* Generic McBSP register settings */
regs->spcr2 |= XINTM(3) | FREE;
@@ -293,6 +307,8 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
/* 0-bit data delay */
regs->rcr2 |= RDATDLY(0);
regs->xcr2 |= XDATDLY(0);
+ /* Invert bit clock and FS polarity configuration for DSP_A */
+ temp_fmt ^= SND_SOC_DAIFMT_IB_IF;
break;
default:
/* Unsupported data format */
@@ -316,7 +332,7 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
}
/* Set bit clock (CLKX/CLKR) and FS polarities */
- switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+ switch (temp_fmt & SND_SOC_DAIFMT_INV_MASK) {
case SND_SOC_DAIFMT_NB_NF:
/*
* Normal BCLK + FS.